diff --git a/__pycache__/_config.cpython-37.pyc b/__pycache__/_config.cpython-37.pyc
deleted file mode 100644
index 8f84fb0..0000000
Binary files a/__pycache__/_config.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
deleted file mode 100644
index 420c21a..0000000
Binary files a/__pycache__/barlow.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/barlow.cpython-38.pyc b/__pycache__/barlow.cpython-38.pyc
deleted file mode 100644
index f9d719e..0000000
Binary files a/__pycache__/barlow.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/barlow.cpython-39.pyc b/__pycache__/barlow.cpython-39.pyc
deleted file mode 100644
index 9d69311..0000000
Binary files a/__pycache__/barlow.cpython-39.pyc and /dev/null differ
diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
deleted file mode 100644
index b13b62f..0000000
Binary files a/__pycache__/barlow_utils.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/barlow_utils.cpython-38.pyc b/__pycache__/barlow_utils.cpython-38.pyc
deleted file mode 100644
index 89d8ded..0000000
Binary files a/__pycache__/barlow_utils.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
deleted file mode 100644
index acc1737..0000000
Binary files a/__pycache__/models.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/models.cpython-38.pyc b/__pycache__/models.cpython-38.pyc
deleted file mode 100644
index 13fe121..0000000
Binary files a/__pycache__/models.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/models.cpython-39.pyc b/__pycache__/models.cpython-39.pyc
deleted file mode 100644
index ca374b0..0000000
Binary files a/__pycache__/models.cpython-39.pyc and /dev/null differ
diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
deleted file mode 100644
index c4b566b..0000000
Binary files a/__pycache__/t_dataset.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/t_dataset.cpython-38.pyc b/__pycache__/t_dataset.cpython-38.pyc
deleted file mode 100644
index 1814434..0000000
Binary files a/__pycache__/t_dataset.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/t_dataset.cpython-39.pyc b/__pycache__/t_dataset.cpython-39.pyc
deleted file mode 100644
index 245625a..0000000
Binary files a/__pycache__/t_dataset.cpython-39.pyc and /dev/null differ
diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
deleted file mode 100644
index b5b1fb5..0000000
Binary files a/__pycache__/train_translation.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/train_translation.cpython-38.pyc b/__pycache__/train_translation.cpython-38.pyc
deleted file mode 100644
index 413cf61..0000000
Binary files a/__pycache__/train_translation.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/train_translation.cpython-39.pyc b/__pycache__/train_translation.cpython-39.pyc
deleted file mode 100644
index ae42fee..0000000
Binary files a/__pycache__/train_translation.cpython-39.pyc and /dev/null differ
diff --git a/__pycache__/translation_dataset.cpython-37.pyc b/__pycache__/translation_dataset.cpython-37.pyc
deleted file mode 100644
index 7ac9cd8..0000000
Binary files a/__pycache__/translation_dataset.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/translation_dataset.cpython-38.pyc b/__pycache__/translation_dataset.cpython-38.pyc
deleted file mode 100644
index 849c726..0000000
Binary files a/__pycache__/translation_dataset.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/translation_dataset.cpython-39.pyc b/__pycache__/translation_dataset.cpython-39.pyc
deleted file mode 100644
index 5c8b8c5..0000000
Binary files a/__pycache__/translation_dataset.cpython-39.pyc and /dev/null differ
diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
deleted file mode 100644
index 12c22a5..0000000
Binary files a/__pycache__/translation_utils.cpython-37.pyc and /dev/null differ
diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
deleted file mode 100644
index a1e7877..0000000
Binary files a/__pycache__/translation_utils.cpython-38.pyc and /dev/null differ
diff --git a/__pycache__/translation_utils.cpython-39.pyc b/__pycache__/translation_utils.cpython-39.pyc
deleted file mode 100644
index c4cfb7d..0000000
Binary files a/__pycache__/translation_utils.cpython-39.pyc and /dev/null differ
diff --git a/t_dataset.py b/t_dataset.py
index 5767e4c..8b4334e 100644
--- a/t_dataset.py
+++ b/t_dataset.py
@@ -1,3 +1,4 @@
+# edits: padding=True 
 import torch 
 from datasets import load_dataset
 from transformers import AutoTokenizer 
@@ -20,40 +21,67 @@ def __init__(self,
             split = "train" 
         else: 
             split = "test" 
+        print('getting dataset')
         self.dataset = load_dataset('wmt14', "de-en", split=split) 
         self.de_list = []
         self.en_list = []
 #        self.tokenizer = tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
         en_list_2 = []
+        #for k in range(100):#len(self.dataset)):
+        #   n,i = self.dataset[k]
         for n, i in enumerate(self.dataset): 
             en_list_2.append(i['translation']['en'].lower())
-
-        a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
+            #print(n)
+            if n==500:
+                break
+        #print(len(en_list_2))
+        # print(max(en_list_2))
+        token_res = self.tokenizer(en_list_2, padding=True,max_length=512, return_tensors='pt', truncation=True)['input_ids']
+        a1 = list(token_res)
         self.en_vocab, self.en_vocab_size = vocab(a1)
         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
         self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
         
+
+        for n, i in enumerate(self.dataset): 
+            #if len(i['translation']['de'])> 400:
+            #    print(len(i['translation']['de']))
+
+            #elif len(i['translation']['en'])> 400: 
+            #    print(len(i['translation']['en']))
+            #    print(i['translation']['en'])
+
+            #else: 
+               # print(len(i['translation']['de']))
+            self.de_list.append(self.tokenizer(i['translation']['de'].lower(), padding=True, return_tensors='pt',max_length=512, truncation=True)["input_ids"])
+            self.en_list.append(self.tokenizer(i['translation']['en'].lower(), padding=True, return_tensors='pt',max_length=512, truncation=True)["input_ids"])
+            if n==500:
+                break
+
         for i in self.dataset: 
             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
                         padding=True, return_tensors='pt')["input_ids"])
-            self.en_list.append(self.tokenizer(i['translation']['en'].lower(), 
+            self.en_list.append(self.tokenizer(i['translation']['en'].lower(),
                         padding=True, return_tensors='pt')["input_ids"])
-            
         # en_list_id = []
         # for i in self.dataset: 
         #     en_list_id.append(i['translation']['en'].lower())
         de_list_1 = []
         for n,i in enumerate(self.dataset): 
           de_list_1.append(i['translation']['de'].lower())
+          if n==500:
+                break
 
-        a = list(self.tokenizer(de_list_1, padding=True, return_tensors='pt')['input_ids'])
+        a = list(self.tokenizer(de_list_1, padding=True, return_tensors='pt',max_length=512, truncation=True)['input_ids'])
 
         en_list_1 = []
         for n,i in enumerate(self.dataset): 
           en_list_1.append(i['translation']['en'].lower())
+          if n==500:
+              break
 
-        b = list(self.tokenizer(de_list_1, padding=True, return_tensors='pt')['input_ids'])
+        b = list(self.tokenizer(de_list_1, padding=True, max_length=512, return_tensors='pt', truncation=True)['input_ids'])
         # en_vocab, self.en_vocab_size = vocab(b)
         self.de_vocab, self.de_vocab_size = vocab(a) 
             
diff --git a/t_dataset2.py b/t_dataset2.py
new file mode 100644
index 0000000..b7cb015
--- /dev/null
+++ b/t_dataset2.py
@@ -0,0 +1,157 @@
+
+import torch 
+from datasets import load_dataset
+from transformers import AutoTokenizer 
+# from _config import Config as config 
+from torch.nn.utils.rnn import pad_sequence
+from torch.utils.data import DataLoader, Dataset
+
+import translation_utils
+from translation_utils import vocab 
+import os 
+
+
+os.environ['TRANSFORMERS_OFFLINE'] = 'yes' 
+class Translation_dataset_t(Dataset):
+    
+    def __init__(self, 
+            train: bool = True):
+      
+        if train: 
+            split = "train" 
+        else: 
+            split = "test" 
+        print('getting dataset')
+        self.dataset = load_dataset('wmt14', "de-en", split=split) 
+        self.de_list = []
+        self.en_list = []
+#        self.tokenizer = tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
+        en_list_2 = []
+        #for k in range(100):#len(self.dataset)):
+        #   n,i = self.dataset[k]
+        for n, i in enumerate(self.dataset): 
+            en_list_2.append(i['translation']['en'].lower())
+            #print(n)
+            if n==500:
+                break
+        print(len(en_list_2))
+        # print(max(en_list_2))
+        print('error not found') 
+        token_res = self.tokenizer(en_list_2, padding='max_length',max_length=512, return_tensors='pt', truncation=True)['input_ids']
+        a1 = list(token_res)
+        print('error') 
+        self.en_vocab, self.en_vocab_size = vocab(a1)
+        self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
+        print('e') 
+        
+
+        for n, i in enumerate(self.dataset): 
+            #if len(i['translation']['de'])> 400:
+            #    print(len(i['translation']['de']))
+
+            #elif len(i['translation']['en'])> 400: 
+            #    print(len(i['translation']['en']))
+            #    print(i['translation']['en'])
+
+            #else: 
+               # print(len(i['translation']['de']))
+            if len(i['translation']['de'].lower()) > 500: 
+                pass
+            elif len(i['translation']['en'].lower())>500: 
+                pass
+           
+            self.de_list.append(self.tokenizer(i['translation']['de'].lower(), padding='max_length', return_tensors='pt',max_length=512, truncation=True)["input_ids"])
+            self.en_list.append(self.tokenizer(i['translation']['en'].lower(), padding='max_length', return_tensors='pt',max_length=512, truncation=True)["input_ids"])
+          #  if n==500:
+          #      break
+        '''
+        for i in self.dataset: 
+            self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
+                        padding=True, return_tensors='pt')["input_ids"])
+            self.en_list.append(self.tokenizer(i['translation']['en'].lower(),
+                        padding=True, return_tensors='pt')["input_ids"])
+          '''  
+        # en_list_id = []
+        # for i in self.dataset: 
+        #     en_list_id.append(i['translation']['en'].lower())
+
+        de_list_1 = []
+        for n,i in enumerate(self.dataset): 
+
+            if len(i['translation']['de'].lower()) > 500: 
+                pass
+            elif len(i['translation']['en'].lower())>500: 
+                pass
+            de_list_1.append(i['translation']['de'].lower())
+            #if n==500:
+                #break
+
+        a = list(self.tokenizer(de_list_1, padding='max_length', return_tensors='pt',max_length=512, truncation=True)['input_ids'])
+
+        en_list_1 = []
+        for n,i in enumerate(self.dataset): 
+          en_list_1.append(i['translation']['en'].lower())
+          if n==500:
+              break
+
+        b = list(self.tokenizer(de_list_1, padding='max_length', max_length=512, return_tensors='pt', truncation=True)['input_ids'])
+        # en_vocab, self.en_vocab_size = vocab(b)
+        self.de_vocab, self.de_vocab_size = vocab(a) 
+            
+  
+  #should return the length of the dataset  
+    def __len__(self): 
+        return len(self.de_list)
+
+  #should return a particular example
+    def __getitem__(self, index): 
+        src = self.de_list[index]
+        trg = self.en_list[index]
+        
+        return {'src':src, 'trg':trg}
+
+
+
+class MyCollate:
+  def __init__(self, 
+          tokenizer, 
+          bert2id_dict: dict):
+    self.tokenizer = tokenizer
+    self.pad_idx = self.tokenizer.convert_tokens_to_ids(self.tokenizer.pad_token)
+    self.bert2id_dict = bert2id_dict 
+
+  def __call__(self, batch):
+
+    source = []
+    for i in batch:
+      source.append(i['src'].T)
+    #print(source[0].shape, source[1].shape)
+    source = pad_sequence(source, batch_first=False, padding_value=self.pad_idx)
+
+    target = []
+    for i in batch:
+      target.append(i['trg'].T)
+    target = pad_sequence(target, batch_first=False, padding_value = self.pad_idx)
+    
+    target_inp = target.squeeze(-1)[:-1, :]
+    target_out = torch.zeros(target.shape)
+
+    for i in range(len(target)): 
+        for j in range(len(target[i])): 
+            try: 
+                target_out[i][j] = self.bert2id_dict[target[i][j].item()]
+            except KeyError: 
+                target_out[i][j] = self.tokenizer.unk_token_id
+
+    target_out = target_out.squeeze(-1)[1:, :]
+
+    return source.squeeze(), target.squeeze().long(), target_inp.squeeze().long(), target_out.squeeze().long()  
+
+
+# dataset = Translation_dataset()
+# loader = DataLoader(dataset=dataset, 
+#                       batch_size= 32, 
+#                       shuffle=False,
+#                       collate_fn=MyCollate())
diff --git a/train_translation.py b/train_translation.py
index 64cda2f..9f5b778 100644
--- a/train_translation.py
+++ b/train_translation.py
@@ -17,6 +17,7 @@
 import t_dataset
 from t_dataset import  Translation_dataset_t
 from t_dataset import  MyCollate
+import translation_dataset
 import translation_utils 
 from translation_utils import TokenEmbedding, PositionalEncoding 
 from translation_utils import create_mask
@@ -149,10 +150,11 @@ def main_worker(gpu, args):
         world_size=args.world_size, rank=args.rank)
 
     if args.rank == 0:
-
+        '''
         wandb.init(config=args, project='translation_test')#############################################
         wandb.config.update(args)
         config = wandb.config
+        '''
     
         # exit()
         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
@@ -163,7 +165,11 @@ def main_worker(gpu, args):
     torch.cuda.set_device(gpu)
     torch.backends.cudnn.benchmark = True
 
+#    print('loading barlow dataset') 
+#    dataset = translation_dataset.Translation_dataset() 
+    print('loading translation dataset') 
     dataset = Translation_dataset_t(train=args.train) 
+    print('dataset loaded')
     src_vocab_size = dataset.de_vocab_size
     trg_vocab_size = dataset.en_vocab_size
     tokenizer = dataset.tokenizer  
@@ -236,10 +242,11 @@ def main_worker(gpu, args):
     per_device_batch_size = args.batch_size // args.world_size
     id2bert_dict = dataset.id2bert_dict
     ###############################
+    print('instantiating dataloader')
     loader = torch.utils.data.DataLoader(
          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
          pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
+    print('loaded on cuda')
     test_loader = torch.utils.data.DataLoader(
          dataset, batch_size=1, num_workers=args.workers,
          pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
@@ -283,7 +290,7 @@ def main_worker(gpu, args):
                         print(json.dumps(stats), file=stats_file)
             if args.rank == 0:
 
-                wandb.log({"epoch_loss":epoch_loss/t})
+                #wandb.log({"epoch_loss":epoch_loss/t})
                 # save checkpoint
                 state = dict(epoch=epoch + 1, model=model.module.state_dict(),
                             optimizer=optimizer.state_dict())
@@ -296,7 +303,7 @@ def main_worker(gpu, args):
                 if epoch%args.checkbleu ==0 : 
 
                     bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
+                    #wandb.log({'bleu_score': bleu_score}) 
     #            print(bleu_score(predicted, target))
     ##############################################################
     #        if epoch%1 ==0 : 
@@ -309,14 +316,14 @@ def main_worker(gpu, args):
             #                  optimizer=optimizer.state_dict())
             #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
             #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
+        #wandb.finish()
             
     else: 
 
         bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
         print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
+#        if args.rank == 0: 
+            #wandb.log({'bleu_score': bleu_score})
 
 
 def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
@@ -366,6 +373,10 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
         memory = memory
         tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
                     .type(torch.bool)).cuda(gpu, non_blocking=True)
+
+        print('ys shape: ', ys.shape) 
+        print('memory.shape', memory.shape) 
+        print('tgt_mask.shape', tgt_mask.shape) 
         out = model.module.decode(ys, memory, tgt_mask)
         out = out.transpose(0, 1)
         prob = model.module.generator(out[:, -1])
@@ -400,4 +411,4 @@ def translate(model: torch.nn.Module,
 
 if __name__ == '__main__': 
     main()
-    wandb.finish()
+    #wandb.finish()
diff --git a/translation_dataset.py b/translation_dataset.py
index 274c2f3..9dec23e 100644
--- a/translation_dataset.py
+++ b/translation_dataset.py
@@ -16,8 +16,16 @@ def __init__(self):
         self.en_list = []
 
         for i in self.dataset: 
-            self.de_list.append(tokenizer(i['translation']['de'].lower(), padding=True, return_tensors='pt')["input_ids"])
-            self.en_list.append(tokenizer(i['translation']['en'].lower(), padding=True, return_tensors='pt')["input_ids"])
+            if len(i['translation']['de'])> 400:
+                #print(len(i['translation']['de']))
+                pass 
+            elif len(i['translation']['en'])> 400: 
+                #print(len(i['translation']['en']))
+                pass
+            else: 
+               # print(len(i['translation']['de']))
+                self.de_list.append(tokenizer(i['translation']['de'].lower(), padding=True, return_tensors='pt')["input_ids"])
+                self.en_list.append(tokenizer(i['translation']['en'].lower(), padding=True, return_tensors='pt')["input_ids"])
             
 
   
diff --git a/translation_utils.py b/translation_utils.py
index af3437a..747b03f 100644
--- a/translation_utils.py
+++ b/translation_utils.py
@@ -88,14 +88,31 @@ def __init__(self, emb_size, mbert):
         super(TokenEmbedding, self).__init__()
         # self.embedding = nn.Embedding(vocab_size, emb_size)
         self.embedding = mbert
-#         for param in self.embedding.parameters():
-#             param.requires_grad = False
-#         for param in self.embedding.pooler.parameters():
-#             param.requires_grad = True
+        for param in self.embedding.parameters():
+            param.requires_grad = False
+        for param in self.embedding.pooler.parameters():
+            param.requires_grad = True
         self.emb_size = emb_size
 
     def forward(self, tokens: torch.tensor):
         # print(tokens.shape)
         if len(tokens.shape) ==1: 
             tokens  = tokens.unsqueeze(-1)
+
+        try: 
+            self.embedding(tokens.long().T)['last_hidden_state']
+        except RuntimeError: 
+            print('errored')
+
         return self.embedding(tokens.long().T)['last_hidden_state'].permute(1, 0, 2) * math.sqrt(self.emb_size)
+
+  #      try: 
+
+
+'''
+        except RuntimeError: 
+            print('errored')
+            b = torch.zeros(tokens.shape[0], 1, 768)
+            pass
+
+'''
diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
deleted file mode 120000
index 5c95722..0000000
--- a/wandb/debug-internal.log
+++ /dev/null
@@ -1 +0,0 @@
-run-20220416_014323-1a0lobwa/logs/debug-internal.log
\ No newline at end of file
diff --git a/wandb/debug.log b/wandb/debug.log
deleted file mode 120000
index c54d1ec..0000000
--- a/wandb/debug.log
+++ /dev/null
@@ -1 +0,0 @@
-run-20220416_014323-1a0lobwa/logs/debug.log
\ No newline at end of file
diff --git a/wandb/latest-run b/wandb/latest-run
deleted file mode 120000
index 34b339f..0000000
--- a/wandb/latest-run
+++ /dev/null
@@ -1 +0,0 @@
-run-20220416_014323-1a0lobwa
\ No newline at end of file
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py b/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py
deleted file mode 100644
index c6ab0ef..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py
+++ /dev/null
@@ -1,400 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-                # wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-        predicted.append(out)
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print(out)
-        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-#    for i in len(tgt_tokens): 
-#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml b/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/config.yaml b/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
deleted file mode 100644
index b88038a..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 4
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 5
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 1
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 1
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/diff.patch b/wandb/run-20220415_190620-2py0vpvt/files/diff.patch
deleted file mode 100644
index 0634eb7..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/diff.patch
+++ /dev/null
@@ -1,30635 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..f232b40 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,51 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..c6ab0ef 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,15 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
-+        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-         predicted.append(out)
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print(out)
-+        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +379,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +387,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+#    for i in len(tgt_tokens): 
-+#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..40790bc 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220415_190620-2py0vpvt/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..6613878 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220415_190620-2py0vpvt/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..1188b40 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220415_190620-2py0vpvt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/output.log b/wandb/run-20220415_190620-2py0vpvt/files/output.log
deleted file mode 100644
index ee1cf94..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/output.log
+++ /dev/null
@@ -1,77 +0,0 @@
-
-train_translation.py --load=0
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-translation model saved in checkpoint
-Exception in thread Thread-3:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
-    msg = self._response_queue.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
-Exception in thread Thread-15:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
-    status_response = self._interface.communicate_stop_status()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
-    resp = self._communicate(req, timeout=timeout, local=True)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
-    return self._communicate_async(rec, local=local).get(timeout=timeout)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
-    raise Exception("The wandb backend process has shutdown")
-Exception: The wandb backend process has shutdown
-Traceback (most recent call last):
-  File "<string>", line 1, in <module>
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
-    exitcode = _main(fd)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
-    return self._bootstrap()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
-    threading._shutdown()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
-    lock.acquire()
-KeyboardInterrupt
\ No newline at end of file
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt b/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json b/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json
deleted file mode 100644
index 7fdc37d..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T13:36:21.737888",
-    "startedAt": "2022-04-15T13:36:20.741849",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [
-        "--load=0"
-    ],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json b/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
deleted file mode 100644
index 6c757d0..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 113.57089103062948, "_runtime": 35, "_timestamp": 1650029815, "_step": 0}
\ No newline at end of file
diff --git a/wandb/run-20220415_190620-2py0vpvt/logs/debug-internal.log b/wandb/run-20220415_190620-2py0vpvt/logs/debug-internal.log
deleted file mode 100644
index 896a0da..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/logs/debug-internal.log
+++ /dev/null
@@ -1,118 +0,0 @@
-2022-04-15 19:06:20,774 INFO    wandb_internal:5906 [internal.py:wandb_internal():91] W&B internal server running at pid: 5906, started at: 2022-04-15 19:06:20.773660
-2022-04-15 19:06:20,798 INFO    MainThread:5906 [wandb_init.py:init():423] backend started and connected
-2022-04-15 19:06:20,798 DEBUG   MainThread:5906 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-15 19:06:20,799 INFO    MainThread:5906 [wandb_init.py:init():465] updated telemetry
-2022-04-15 19:06:20,809 INFO    MainThread:5906 [wandb_init.py:init():484] communicating current version
-2022-04-15 19:06:20,822 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: check_version
-2022-04-15 19:06:20,823 DEBUG   SenderThread:5906 [sender.py:send():179] send: header
-2022-04-15 19:06:20,822 INFO    WriterThread:5906 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb
-2022-04-15 19:06:20,824 DEBUG   SenderThread:5906 [sender.py:send_request():193] send_request: check_version
-2022-04-15 19:06:21,045 INFO    MainThread:5906 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 19:06:21,045 INFO    MainThread:5906 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 19:06:21,046 DEBUG   SenderThread:5906 [sender.py:send():179] send: run
-2022-04-15 19:06:21,723 INFO    MainThread:5906 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 19:06:21,723 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: run_start
-2022-04-15 19:06:21,737 DEBUG   HandlerThread:5906 [meta.py:__init__():39] meta init
-2022-04-15 19:06:21,737 DEBUG   HandlerThread:5906 [meta.py:__init__():53] meta init done
-2022-04-15 19:06:21,737 DEBUG   HandlerThread:5906 [meta.py:probe():210] probe
-2022-04-15 19:06:21,744 DEBUG   HandlerThread:5906 [meta.py:_setup_git():200] setup git
-2022-04-15 19:06:21,781 INFO    SenderThread:5906 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files
-2022-04-15 19:06:21,782 INFO    SenderThread:5906 [sender.py:_start_run_threads():707] run started: 2py0vpvt with start time 1650029780
-2022-04-15 19:06:21,782 DEBUG   SenderThread:5906 [sender.py:send():179] send: summary
-2022-04-15 19:06:21,782 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:06:21,821 DEBUG   HandlerThread:5906 [meta.py:_setup_git():207] setup git done
-2022-04-15 19:06:21,821 DEBUG   HandlerThread:5906 [meta.py:_save_code():89] save code
-2022-04-15 19:06:21,858 DEBUG   HandlerThread:5906 [meta.py:_save_code():110] save code done
-2022-04-15 19:06:21,858 DEBUG   HandlerThread:5906 [meta.py:_save_patches():127] save patches
-2022-04-15 19:06:22,072 DEBUG   HandlerThread:5906 [meta.py:_save_patches():169] save patches done
-2022-04-15 19:06:22,072 DEBUG   HandlerThread:5906 [meta.py:_save_pip():57] save pip
-2022-04-15 19:06:22,073 DEBUG   HandlerThread:5906 [meta.py:_save_pip():71] save pip done
-2022-04-15 19:06:22,073 DEBUG   HandlerThread:5906 [meta.py:_save_conda():78] save conda
-2022-04-15 19:06:22,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/diff.patch
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code
-2022-04-15 19:06:25,546 DEBUG   HandlerThread:5906 [meta.py:_save_conda():86] save conda done
-2022-04-15 19:06:25,546 DEBUG   HandlerThread:5906 [meta.py:probe():252] probe done
-2022-04-15 19:06:25,549 DEBUG   SenderThread:5906 [sender.py:send():179] send: files
-2022-04-15 19:06:25,549 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 19:06:25,549 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 19:06:25,551 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 19:06:25,581 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:06:25,581 DEBUG   SenderThread:5906 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:06:25,593 INFO    MainThread:5906 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 19:06:25,594 INFO    MainThread:5906 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 19:06:25,594 INFO    MainThread:5906 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 19:06:25,633 INFO    MainThread:5906 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 19:06:25,633 INFO    MainThread:5906 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 19:06:25,634 INFO    MainThread:5906 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json
-2022-04-15 19:06:26,074 DEBUG   SenderThread:5906 [sender.py:send():179] send: config
-2022-04-15 19:06:26,807 INFO    Thread-14 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/373ehk48-wandb-metadata.json
-2022-04-15 19:06:26,833 INFO    Thread-16 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/16e4mjp9-code/train_translation.py
-2022-04-15 19:06:27,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
-2022-04-15 19:06:27,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:29,014 INFO    Thread-18 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/2zz8ar1z-diff.patch
-2022-04-15 19:06:29,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:33,774 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:41,075 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:06:41,076 DEBUG   SenderThread:5906 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:06:47,843 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:49,844 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:50,925 DEBUG   SenderThread:5906 [sender.py:send():179] send: stats
-2022-04-15 19:06:51,845 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:53,845 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:55,022 DEBUG   SenderThread:5906 [sender.py:send():179] send: history
-2022-04-15 19:06:55,022 DEBUG   SenderThread:5906 [sender.py:send():179] send: summary
-2022-04-15 19:06:55,023 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:06:55,851 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:55,851 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:06:56,825 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:06:56,825 DEBUG   SenderThread:5906 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:07:11,877 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:12,360 DEBUG   HandlerThread:5906 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:07:12,361 DEBUG   SenderThread:5906 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:07:19,703 INFO    WriterThread:5906 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb
-2022-04-15 19:07:19,860 INFO    SenderThread:5906 [sender.py:finish():933] shutting down sender
-2022-04-15 19:07:19,860 INFO    SenderThread:5906 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 19:07:19,879 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt requirements.txt
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log output.log
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json wandb-summary.json
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml config.yaml
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/diff.patch diff.patch
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py code/train_translation.py
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 19:07:21,094 INFO    Thread-25 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
-2022-04-15 19:07:21,208 INFO    Thread-29 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
-2022-04-15 19:07:21,219 INFO    Thread-26 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:21,814 INFO    Thread-27 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:07:22,524 INFO    Thread-28 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:07:23,194 ERROR   wandb_internal:5906 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 19:34:32,989 INFO    MainThread:5906 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 19:34:32,989 INFO    MainThread:5906 [wandb_run.py:_restore():1480] restore
-2022-04-15 19:34:33,088 INFO    MainThread:5906 [wandb_run.py:_restore():1480] restore
-2022-04-15 19:34:33,091 INFO    MainThread:5906 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_190620-2py0vpvt/logs/debug.log b/wandb/run-20220415_190620-2py0vpvt/logs/debug.log
deleted file mode 100644
index a71d0fa..0000000
--- a/wandb/run-20220415_190620-2py0vpvt/logs/debug.log
+++ /dev/null
@@ -1,94 +0,0 @@
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/logs/debug.log
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/logs/debug-internal.log
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_init.py:init():369] calling init triggers
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 19:06:20,743 INFO    MainThread:5906 [wandb_init.py:init():418] starting backend
-2022-04-15 19:06:20,751 INFO    MainThread:5906 [backend.py:ensure_launched():132] starting backend process...
-2022-04-15 19:06:20,773 INFO    MainThread:5906 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-15 19:06:20,774 INFO    wandb_internal:5906 [internal.py:wandb_internal():91] W&B internal server running at pid: 5906, started at: 2022-04-15 19:06:20.773660
-2022-04-15 19:06:20,798 INFO    MainThread:5906 [wandb_init.py:init():423] backend started and connected
-2022-04-15 19:06:20,799 INFO    MainThread:5906 [wandb_init.py:init():465] updated telemetry
-2022-04-15 19:06:20,809 INFO    MainThread:5906 [wandb_init.py:init():484] communicating current version
-2022-04-15 19:06:20,822 INFO    WriterThread:5906 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb
-2022-04-15 19:06:21,045 INFO    MainThread:5906 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 19:06:21,045 INFO    MainThread:5906 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 19:06:21,723 INFO    MainThread:5906 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 19:06:21,781 INFO    SenderThread:5906 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files
-2022-04-15 19:06:21,782 INFO    SenderThread:5906 [sender.py:_start_run_threads():707] run started: 2py0vpvt with start time 1650029780
-2022-04-15 19:06:21,782 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:06:22,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/diff.patch
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:06:22,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code
-2022-04-15 19:06:25,549 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 19:06:25,549 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 19:06:25,551 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 19:06:25,593 INFO    MainThread:5906 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 19:06:25,594 INFO    MainThread:5906 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 19:06:25,594 INFO    MainThread:5906 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 19:06:25,633 INFO    MainThread:5906 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 19:06:25,633 INFO    MainThread:5906 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 19:06:25,634 INFO    MainThread:5906 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:25,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json
-2022-04-15 19:06:26,807 INFO    Thread-14 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/373ehk48-wandb-metadata.json
-2022-04-15 19:06:26,833 INFO    Thread-16 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/16e4mjp9-code/train_translation.py
-2022-04-15 19:06:27,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
-2022-04-15 19:06:27,769 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:29,014 INFO    Thread-18 :5906 [upload_job.py:push():133] Uploaded file /tmp/tmpbd0vash4wandb/2zz8ar1z-diff.patch
-2022-04-15 19:06:29,770 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:33,774 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:47,843 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:49,844 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:51,845 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:53,845 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:55,023 INFO    SenderThread:5906 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:06:55,851 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:06:55,851 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:07:11,877 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:19,703 INFO    WriterThread:5906 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb
-2022-04-15 19:07:19,860 INFO    SenderThread:5906 [sender.py:finish():933] shutting down sender
-2022-04-15 19:07:19,860 INFO    SenderThread:5906 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 19:07:19,879 INFO    Thread-12 :5906 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt requirements.txt
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log output.log
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json wandb-summary.json
-2022-04-15 19:07:19,880 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml config.yaml
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/diff.patch diff.patch
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/code/train_translation.py code/train_translation.py
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 19:07:19,881 INFO    SenderThread:5906 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 19:07:21,094 INFO    Thread-25 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/requirements.txt
-2022-04-15 19:07:21,208 INFO    Thread-29 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/config.yaml
-2022-04-15 19:07:21,219 INFO    Thread-26 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/output.log
-2022-04-15 19:07:21,814 INFO    Thread-27 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/conda-environment.yaml
-2022-04-15 19:07:22,524 INFO    Thread-28 :5906 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_190620-2py0vpvt/files/wandb-summary.json
-2022-04-15 19:07:23,194 ERROR   wandb_internal:5906 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 19:34:32,989 INFO    MainThread:5906 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 19:34:32,989 INFO    MainThread:5906 [wandb_run.py:_restore():1480] restore
-2022-04-15 19:34:33,088 INFO    MainThread:5906 [wandb_run.py:_restore():1480] restore
-2022-04-15 19:34:33,091 INFO    MainThread:5906 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb b/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb
deleted file mode 100644
index 30e91cb..0000000
Binary files a/wandb/run-20220415_190620-2py0vpvt/run-2py0vpvt.wandb and /dev/null differ
diff --git a/wandb/run-20220415_193521-231emzap/files/code/train_translation.py b/wandb/run-20220415_193521-231emzap/files/code/train_translation.py
deleted file mode 100644
index c6ab0ef..0000000
--- a/wandb/run-20220415_193521-231emzap/files/code/train_translation.py
+++ /dev/null
@@ -1,400 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-                # wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-        predicted.append(out)
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print(out)
-        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-#    for i in len(tgt_tokens): 
-#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml b/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220415_193521-231emzap/files/config.yaml b/wandb/run-20220415_193521-231emzap/files/config.yaml
deleted file mode 100644
index 4ed8c75..0000000
--- a/wandb/run-20220415_193521-231emzap/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 4
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 5
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220415_193521-231emzap/files/diff.patch b/wandb/run-20220415_193521-231emzap/files/diff.patch
deleted file mode 100644
index b1ff87d..0000000
--- a/wandb/run-20220415_193521-231emzap/files/diff.patch
+++ /dev/null
@@ -1,30645 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..babc6a1 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,61 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..c6ab0ef 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,15 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
-+        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-         predicted.append(out)
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print(out)
-+        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +379,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +387,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+#    for i in len(tgt_tokens): 
-+#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..18bad28 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220415_193521-231emzap/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..cb81c04 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220415_193521-231emzap/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..c168413 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220415_193521-231emzap
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220415_193521-231emzap/files/output.log b/wandb/run-20220415_193521-231emzap/files/output.log
deleted file mode 100644
index 301455d..0000000
--- a/wandb/run-20220415_193521-231emzap/files/output.log
+++ /dev/null
@@ -1,77 +0,0 @@
-
-train_translation.py
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-translation model saved in checkpoint
-Exception in thread Thread-3:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
-    msg = self._response_queue.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
-Exception in thread Thread-15:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
-    status_response = self._interface.communicate_stop_status()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
-    resp = self._communicate(req, timeout=timeout, local=True)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
-    return self._communicate_async(rec, local=local).get(timeout=timeout)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
-    raise Exception("The wandb backend process has shutdown")
-Exception: The wandb backend process has shutdown
-Traceback (most recent call last):
-  File "<string>", line 1, in <module>
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
-    exitcode = _main(fd)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
-    return self._bootstrap()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
-    threading._shutdown()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
-    lock.acquire()
-KeyboardInterrupt
\ No newline at end of file
diff --git a/wandb/run-20220415_193521-231emzap/files/requirements.txt b/wandb/run-20220415_193521-231emzap/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220415_193521-231emzap/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json b/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json
deleted file mode 100644
index 02e1ef7..0000000
--- a/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T14:05:22.557883",
-    "startedAt": "2022-04-15T14:05:21.616163",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220415_193521-231emzap/files/wandb-summary.json b/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
deleted file mode 100644
index 3c99905..0000000
--- a/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 103.21329364776611, "_runtime": 1149, "_timestamp": 1650032670, "_step": 0}
\ No newline at end of file
diff --git a/wandb/run-20220415_193521-231emzap/logs/debug-internal.log b/wandb/run-20220415_193521-231emzap/logs/debug-internal.log
deleted file mode 100644
index 88e8878..0000000
--- a/wandb/run-20220415_193521-231emzap/logs/debug-internal.log
+++ /dev/null
@@ -1,302 +0,0 @@
-2022-04-15 19:35:21,654 INFO    wandb_internal:6227 [internal.py:wandb_internal():91] W&B internal server running at pid: 6227, started at: 2022-04-15 19:35:21.641638
-2022-04-15 19:35:21,661 INFO    MainThread:6227 [wandb_init.py:init():423] backend started and connected
-2022-04-15 19:35:21,661 DEBUG   MainThread:6227 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-15 19:35:21,663 INFO    MainThread:6227 [wandb_init.py:init():465] updated telemetry
-2022-04-15 19:35:21,677 INFO    MainThread:6227 [wandb_init.py:init():484] communicating current version
-2022-04-15 19:35:21,707 DEBUG   SenderThread:6227 [sender.py:send():179] send: header
-2022-04-15 19:35:21,705 INFO    WriterThread:6227 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/run-231emzap.wandb
-2022-04-15 19:35:21,707 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: check_version
-2022-04-15 19:35:21,707 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: check_version
-2022-04-15 19:35:21,919 INFO    MainThread:6227 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 19:35:21,919 INFO    MainThread:6227 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 19:35:21,929 DEBUG   SenderThread:6227 [sender.py:send():179] send: run
-2022-04-15 19:35:22,542 INFO    MainThread:6227 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 19:35:22,543 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: run_start
-2022-04-15 19:35:22,557 DEBUG   HandlerThread:6227 [meta.py:__init__():39] meta init
-2022-04-15 19:35:22,557 DEBUG   HandlerThread:6227 [meta.py:__init__():53] meta init done
-2022-04-15 19:35:22,557 DEBUG   HandlerThread:6227 [meta.py:probe():210] probe
-2022-04-15 19:35:22,564 DEBUG   HandlerThread:6227 [meta.py:_setup_git():200] setup git
-2022-04-15 19:35:22,618 INFO    SenderThread:6227 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files
-2022-04-15 19:35:22,618 INFO    SenderThread:6227 [sender.py:_start_run_threads():707] run started: 231emzap with start time 1650031521
-2022-04-15 19:35:22,618 DEBUG   SenderThread:6227 [sender.py:send():179] send: summary
-2022-04-15 19:35:22,619 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:35:22,641 DEBUG   HandlerThread:6227 [meta.py:_setup_git():207] setup git done
-2022-04-15 19:35:22,641 DEBUG   HandlerThread:6227 [meta.py:_save_code():89] save code
-2022-04-15 19:35:22,667 DEBUG   HandlerThread:6227 [meta.py:_save_code():110] save code done
-2022-04-15 19:35:22,668 DEBUG   HandlerThread:6227 [meta.py:_save_patches():127] save patches
-2022-04-15 19:35:22,900 DEBUG   HandlerThread:6227 [meta.py:_save_patches():169] save patches done
-2022-04-15 19:35:22,900 DEBUG   HandlerThread:6227 [meta.py:_save_pip():57] save pip
-2022-04-15 19:35:22,900 DEBUG   HandlerThread:6227 [meta.py:_save_pip():71] save pip done
-2022-04-15 19:35:22,901 DEBUG   HandlerThread:6227 [meta.py:_save_conda():78] save conda
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code/train_translation.py
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/diff.patch
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:35:23,605 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code
-2022-04-15 19:35:26,867 DEBUG   HandlerThread:6227 [meta.py:_save_conda():86] save conda done
-2022-04-15 19:35:26,867 DEBUG   HandlerThread:6227 [meta.py:probe():252] probe done
-2022-04-15 19:35:26,874 DEBUG   SenderThread:6227 [sender.py:send():179] send: files
-2022-04-15 19:35:26,874 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 19:35:26,875 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 19:35:26,875 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 19:35:26,897 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:35:26,897 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:35:26,909 INFO    MainThread:6227 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 19:35:26,913 INFO    MainThread:6227 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 19:35:26,913 INFO    MainThread:6227 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 19:35:26,967 INFO    MainThread:6227 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 19:35:26,967 INFO    MainThread:6227 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 19:35:26,968 INFO    MainThread:6227 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 19:35:27,399 DEBUG   SenderThread:6227 [sender.py:send():179] send: config
-2022-04-15 19:35:27,603 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:35:27,603 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json
-2022-04-15 19:35:27,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:35:28,289 INFO    Thread-14 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/3l2un8y7-wandb-metadata.json
-2022-04-15 19:35:28,309 INFO    Thread-16 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/2642x5u1-code/train_translation.py
-2022-04-15 19:35:29,248 INFO    Thread-18 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/3w1kgl5c-diff.patch
-2022-04-15 19:35:29,611 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml
-2022-04-15 19:35:29,611 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:35:33,612 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:35:42,401 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:35:42,401 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:35:51,746 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:35:57,983 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:35:57,983 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:36:07,625 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:36:09,626 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:36:14,234 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:36:14,234 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:36:22,435 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:36:29,784 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:36:29,784 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:36:45,336 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:36:45,336 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:36:53,115 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:37:00,854 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:37:00,854 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:37:16,412 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:37:16,412 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:37:23,775 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:37:31,915 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:37:31,915 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:37:47,561 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:37:47,562 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:37:54,451 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:38:03,052 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:38:03,052 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:38:15,668 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:38:18,601 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:38:18,601 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:38:25,150 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:38:34,137 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:38:34,138 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:38:49,657 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:38:49,657 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:38:55,860 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:39:05,152 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:39:05,153 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:39:20,952 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:39:20,952 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:39:26,548 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:39:36,524 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:39:36,524 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:39:52,137 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:39:52,138 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:39:57,181 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:40:07,658 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:40:07,658 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:40:21,704 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:40:23,266 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:40:23,267 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:40:27,854 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:40:38,901 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:40:38,901 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:40:54,413 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:40:54,413 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:40:58,482 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:41:09,931 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:41:09,931 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:41:25,494 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:41:25,494 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:41:29,163 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:41:41,013 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:41:41,013 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:41:56,570 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:41:56,570 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:41:59,758 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:42:12,095 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:42:12,095 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:42:25,749 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:42:27,592 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:42:27,592 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:42:30,434 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:42:43,166 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:42:43,166 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:42:58,751 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:42:58,751 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:43:01,144 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:43:14,279 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:43:14,280 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:43:29,854 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:43:29,855 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:43:31,764 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:43:45,403 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:43:45,403 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:44:00,964 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:44:00,965 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:44:02,446 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:44:17,234 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:44:17,234 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:44:29,793 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:44:32,848 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:44:32,848 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:44:33,426 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:44:48,428 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:44:48,428 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:45:03,865 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:45:04,062 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:45:04,062 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:45:19,623 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:45:19,623 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:45:34,533 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:45:35,138 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:45:35,139 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:45:50,645 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:45:50,645 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:46:05,254 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:46:06,221 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:46:06,221 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:46:21,766 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:46:21,766 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:46:35,925 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:46:37,397 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:46:37,397 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:46:37,828 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:46:52,955 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:46:52,955 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:47:06,616 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:47:08,555 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:47:08,555 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:47:24,064 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:47:24,064 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:47:37,263 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:47:39,645 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:47:39,646 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:47:55,185 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:47:55,185 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:48:07,887 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:48:10,707 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:48:10,707 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:48:26,268 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:48:26,269 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:48:38,517 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:48:41,854 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:48:41,854 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:48:43,870 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:48:57,355 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:48:57,355 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:49:09,161 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:49:13,066 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:49:13,066 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:49:28,650 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:49:28,651 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:49:39,887 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:49:44,321 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:49:44,322 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:49:59,888 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:49:59,888 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:50:10,589 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:50:15,420 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:50:15,421 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:50:30,986 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:50:30,987 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:50:41,331 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:50:46,616 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:50:46,617 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:50:47,905 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:51:02,176 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:51:02,176 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:51:12,008 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:51:17,725 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:51:17,725 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:51:33,508 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:51:33,508 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:51:42,613 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:51:49,212 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:51:49,212 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:52:04,733 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:52:04,733 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:52:13,263 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:52:20,327 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:52:20,327 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:52:35,877 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:52:35,877 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:52:43,808 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:52:51,414 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:52:51,414 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:52:54,940 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:53:07,030 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:53:07,030 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:53:14,500 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:53:22,649 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:53:22,650 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:53:38,185 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:53:38,185 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:53:45,170 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:53:53,780 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:53:53,780 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:54:09,368 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:54:09,368 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:54:15,790 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:54:24,949 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:54:24,949 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:54:30,772 DEBUG   SenderThread:6227 [sender.py:send():179] send: history
-2022-04-15 19:54:30,772 DEBUG   SenderThread:6227 [sender.py:send():179] send: summary
-2022-04-15 19:54:30,772 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:54:30,966 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:54:40,527 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:54:40,528 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:54:46,854 DEBUG   SenderThread:6227 [sender.py:send():179] send: stats
-2022-04-15 19:54:48,284 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:54:56,050 DEBUG   HandlerThread:6227 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 19:54:56,050 DEBUG   SenderThread:6227 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 19:54:58,727 INFO    SenderThread:6227 [sender.py:finish():933] shutting down sender
-2022-04-15 19:54:58,727 INFO    SenderThread:6227 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 19:54:59,092 INFO    WriterThread:6227 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/run-231emzap.wandb
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt requirements.txt
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log output.log
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json wandb-summary.json
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml config.yaml
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/diff.patch diff.patch
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code/train_translation.py code/train_translation.py
-2022-04-15 19:54:59,345 INFO    SenderThread:6227 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 19:54:59,345 INFO    SenderThread:6227 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 19:55:00,848 INFO    Thread-29 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml
-2022-04-15 19:55:00,870 INFO    Thread-25 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt
-2022-04-15 19:55:00,895 INFO    Thread-28 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:55:00,913 INFO    Thread-27 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:55:00,979 INFO    Thread-26 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:55:02,041 ERROR   wandb_internal:6227 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 20:07:26,362 INFO    MainThread:6227 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 20:07:26,362 INFO    MainThread:6227 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:07:26,757 INFO    MainThread:6227 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:07:26,761 INFO    MainThread:6227 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_193521-231emzap/logs/debug.log b/wandb/run-20220415_193521-231emzap/logs/debug.log
deleted file mode 100644
index 18e01c4..0000000
--- a/wandb/run-20220415_193521-231emzap/logs/debug.log
+++ /dev/null
@@ -1,97 +0,0 @@
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/logs/debug.log
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/logs/debug-internal.log
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_init.py:init():369] calling init triggers
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 19:35:21,618 INFO    MainThread:6227 [wandb_init.py:init():418] starting backend
-2022-04-15 19:35:21,630 INFO    MainThread:6227 [backend.py:ensure_launched():132] starting backend process...
-2022-04-15 19:35:21,641 INFO    MainThread:6227 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-15 19:35:21,654 INFO    wandb_internal:6227 [internal.py:wandb_internal():91] W&B internal server running at pid: 6227, started at: 2022-04-15 19:35:21.641638
-2022-04-15 19:35:21,661 INFO    MainThread:6227 [wandb_init.py:init():423] backend started and connected
-2022-04-15 19:35:21,663 INFO    MainThread:6227 [wandb_init.py:init():465] updated telemetry
-2022-04-15 19:35:21,677 INFO    MainThread:6227 [wandb_init.py:init():484] communicating current version
-2022-04-15 19:35:21,705 INFO    WriterThread:6227 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/run-231emzap.wandb
-2022-04-15 19:35:21,919 INFO    MainThread:6227 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 19:35:21,919 INFO    MainThread:6227 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 19:35:22,542 INFO    MainThread:6227 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 19:35:22,618 INFO    SenderThread:6227 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files
-2022-04-15 19:35:22,618 INFO    SenderThread:6227 [sender.py:_start_run_threads():707] run started: 231emzap with start time 1650031521
-2022-04-15 19:35:22,619 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code/train_translation.py
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/diff.patch
-2022-04-15 19:35:23,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:35:23,605 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code
-2022-04-15 19:35:26,874 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 19:35:26,875 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 19:35:26,875 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 19:35:26,909 INFO    MainThread:6227 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 19:35:26,913 INFO    MainThread:6227 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 19:35:26,913 INFO    MainThread:6227 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 19:35:26,967 INFO    MainThread:6227 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 19:35:26,967 INFO    MainThread:6227 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 19:35:26,968 INFO    MainThread:6227 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 19:35:27,603 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:35:27,603 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json
-2022-04-15 19:35:27,604 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:35:28,289 INFO    Thread-14 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/3l2un8y7-wandb-metadata.json
-2022-04-15 19:35:28,309 INFO    Thread-16 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/2642x5u1-code/train_translation.py
-2022-04-15 19:35:29,248 INFO    Thread-18 :6227 [upload_job.py:push():133] Uploaded file /tmp/tmpit2mxldiwandb/3w1kgl5c-diff.patch
-2022-04-15 19:35:29,611 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml
-2022-04-15 19:35:29,611 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:35:33,612 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:36:07,625 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:36:09,626 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:38:15,668 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:40:21,704 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:42:25,749 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:44:29,793 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:46:37,828 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:48:43,870 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:50:47,905 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:52:54,940 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:54:30,772 INFO    SenderThread:6227 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 19:54:30,966 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:54:48,284 INFO    Thread-12 :6227 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:54:58,727 INFO    SenderThread:6227 [sender.py:finish():933] shutting down sender
-2022-04-15 19:54:58,727 INFO    SenderThread:6227 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 19:54:59,092 INFO    WriterThread:6227 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/run-231emzap.wandb
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt requirements.txt
-2022-04-15 19:54:59,343 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log output.log
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json wandb-summary.json
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml config.yaml
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/diff.patch diff.patch
-2022-04-15 19:54:59,344 INFO    SenderThread:6227 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/code/train_translation.py code/train_translation.py
-2022-04-15 19:54:59,345 INFO    SenderThread:6227 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 19:54:59,345 INFO    SenderThread:6227 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 19:55:00,848 INFO    Thread-29 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/config.yaml
-2022-04-15 19:55:00,870 INFO    Thread-25 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/requirements.txt
-2022-04-15 19:55:00,895 INFO    Thread-28 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/wandb-summary.json
-2022-04-15 19:55:00,913 INFO    Thread-27 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/conda-environment.yaml
-2022-04-15 19:55:00,979 INFO    Thread-26 :6227 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_193521-231emzap/files/output.log
-2022-04-15 19:55:02,041 ERROR   wandb_internal:6227 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 20:07:26,362 INFO    MainThread:6227 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 20:07:26,362 INFO    MainThread:6227 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:07:26,757 INFO    MainThread:6227 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:07:26,761 INFO    MainThread:6227 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_193521-231emzap/run-231emzap.wandb b/wandb/run-20220415_193521-231emzap/run-231emzap.wandb
deleted file mode 100644
index ceb5081..0000000
Binary files a/wandb/run-20220415_193521-231emzap/run-231emzap.wandb and /dev/null differ
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/code/train_translation.py b/wandb/run-20220415_203240-1bwp8j0o/files/code/train_translation.py
deleted file mode 100644
index a7a253c..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/code/train_translation.py
+++ /dev/null
@@ -1,401 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-                # wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-        predicted.append(out)
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print(out)
-        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-#    for i in len(tgt_tokens): 
-#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml b/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/config.yaml b/wandb/run-20220415_203240-1bwp8j0o/files/config.yaml
deleted file mode 100644
index 4ed8c75..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 4
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 5
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/diff.patch b/wandb/run-20220415_203240-1bwp8j0o/files/diff.patch
deleted file mode 100644
index 8d75a67..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/diff.patch
+++ /dev/null
@@ -1,30655 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..296d49a 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,71 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..a7a253c 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,16 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-         predicted.append(out)
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print(out)
-+        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +380,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +388,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+#    for i in len(tgt_tokens): 
-+#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..b09d5c6 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220415_203240-1bwp8j0o/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..65fde58 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220415_203240-1bwp8j0o/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..a7a7812 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220415_203240-1bwp8j0o
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/output.log b/wandb/run-20220415_203240-1bwp8j0o/files/output.log
deleted file mode 100644
index e69de29..0000000
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/requirements.txt b/wandb/run-20220415_203240-1bwp8j0o/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/wandb-metadata.json b/wandb/run-20220415_203240-1bwp8j0o/files/wandb-metadata.json
deleted file mode 100644
index 635bb75..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/wandb-metadata.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T15:02:42.085900",
-    "startedAt": "2022-04-15T15:02:40.953964",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220415_203240-1bwp8j0o/files/wandb-summary.json b/wandb/run-20220415_203240-1bwp8j0o/files/wandb-summary.json
deleted file mode 100644
index 9e26dfe..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/wandb/run-20220415_203240-1bwp8j0o/logs/debug-internal.log b/wandb/run-20220415_203240-1bwp8j0o/logs/debug-internal.log
deleted file mode 100644
index 6491045..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/logs/debug-internal.log
+++ /dev/null
@@ -1,56 +0,0 @@
-2022-04-15 20:32:40,986 INFO    wandb_internal:6751 [internal.py:wandb_internal():91] W&B internal server running at pid: 6751, started at: 2022-04-15 20:32:40.973711
-2022-04-15 20:32:40,989 INFO    MainThread:6751 [wandb_init.py:init():423] backend started and connected
-2022-04-15 20:32:40,989 DEBUG   MainThread:6751 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-15 20:32:40,991 INFO    MainThread:6751 [wandb_init.py:init():465] updated telemetry
-2022-04-15 20:32:41,002 INFO    MainThread:6751 [wandb_init.py:init():484] communicating current version
-2022-04-15 20:32:41,033 DEBUG   SenderThread:6751 [sender.py:send():179] send: header
-2022-04-15 20:32:41,033 INFO    WriterThread:6751 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/run-1bwp8j0o.wandb
-2022-04-15 20:32:41,034 DEBUG   HandlerThread:6751 [handler.py:handle_request():124] handle_request: check_version
-2022-04-15 20:32:41,034 DEBUG   SenderThread:6751 [sender.py:send_request():193] send_request: check_version
-2022-04-15 20:32:41,353 INFO    MainThread:6751 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 20:32:41,353 INFO    MainThread:6751 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 20:32:41,355 DEBUG   SenderThread:6751 [sender.py:send():179] send: run
-2022-04-15 20:32:42,068 INFO    MainThread:6751 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 20:32:42,068 DEBUG   HandlerThread:6751 [handler.py:handle_request():124] handle_request: run_start
-2022-04-15 20:32:42,085 DEBUG   HandlerThread:6751 [meta.py:__init__():39] meta init
-2022-04-15 20:32:42,085 DEBUG   HandlerThread:6751 [meta.py:__init__():53] meta init done
-2022-04-15 20:32:42,085 DEBUG   HandlerThread:6751 [meta.py:probe():210] probe
-2022-04-15 20:32:42,092 DEBUG   HandlerThread:6751 [meta.py:_setup_git():200] setup git
-2022-04-15 20:32:42,154 INFO    SenderThread:6751 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files
-2022-04-15 20:32:42,154 INFO    SenderThread:6751 [sender.py:_start_run_threads():707] run started: 1bwp8j0o with start time 1650034960
-2022-04-15 20:32:42,154 DEBUG   SenderThread:6751 [sender.py:send():179] send: summary
-2022-04-15 20:32:42,155 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 20:32:42,181 DEBUG   HandlerThread:6751 [meta.py:_setup_git():207] setup git done
-2022-04-15 20:32:42,181 DEBUG   HandlerThread:6751 [meta.py:_save_code():89] save code
-2022-04-15 20:32:42,212 DEBUG   HandlerThread:6751 [meta.py:_save_code():110] save code done
-2022-04-15 20:32:42,212 DEBUG   HandlerThread:6751 [meta.py:_save_patches():127] save patches
-2022-04-15 20:32:42,390 DEBUG   HandlerThread:6751 [meta.py:_save_patches():169] save patches done
-2022-04-15 20:32:42,390 DEBUG   HandlerThread:6751 [meta.py:_save_pip():57] save pip
-2022-04-15 20:32:42,391 DEBUG   HandlerThread:6751 [meta.py:_save_pip():71] save pip done
-2022-04-15 20:32:42,391 DEBUG   HandlerThread:6751 [meta.py:_save_conda():78] save conda
-2022-04-15 20:32:43,129 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/requirements.txt
-2022-04-15 20:32:43,129 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/wandb-summary.json
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/code/train_translation.py
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/diff.patch
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/code
-2022-04-15 20:32:47,042 DEBUG   HandlerThread:6751 [meta.py:_save_conda():86] save conda done
-2022-04-15 20:32:47,042 DEBUG   HandlerThread:6751 [meta.py:probe():252] probe done
-2022-04-15 20:32:47,048 DEBUG   SenderThread:6751 [sender.py:send():179] send: files
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 20:32:47,070 DEBUG   HandlerThread:6751 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 20:32:47,070 DEBUG   SenderThread:6751 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 20:32:47,081 INFO    MainThread:6751 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 20:32:47,082 INFO    MainThread:6751 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 20:32:47,082 INFO    MainThread:6751 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/output.log
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/wandb-metadata.json
-2022-04-15 20:32:47,137 INFO    MainThread:6751 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 20:32:47,137 INFO    MainThread:6751 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 20:32:47,138 INFO    MainThread:6751 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 20:32:47,644 DEBUG   SenderThread:6751 [sender.py:send():179] send: config
-2022-04-15 20:32:48,685 INFO    Thread-14 :6751 [upload_job.py:push():133] Uploaded file /tmp/tmpfzph_9yfwandb/a1uf7dt2-wandb-metadata.json
diff --git a/wandb/run-20220415_203240-1bwp8j0o/logs/debug.log b/wandb/run-20220415_203240-1bwp8j0o/logs/debug.log
deleted file mode 100644
index e0e86ab..0000000
--- a/wandb/run-20220415_203240-1bwp8j0o/logs/debug.log
+++ /dev/null
@@ -1,41 +0,0 @@
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/logs/debug.log
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/logs/debug-internal.log
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_init.py:init():369] calling init triggers
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 20:32:40,955 INFO    MainThread:6751 [wandb_init.py:init():418] starting backend
-2022-04-15 20:32:40,963 INFO    MainThread:6751 [backend.py:ensure_launched():132] starting backend process...
-2022-04-15 20:32:40,973 INFO    MainThread:6751 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-15 20:32:40,986 INFO    wandb_internal:6751 [internal.py:wandb_internal():91] W&B internal server running at pid: 6751, started at: 2022-04-15 20:32:40.973711
-2022-04-15 20:32:40,989 INFO    MainThread:6751 [wandb_init.py:init():423] backend started and connected
-2022-04-15 20:32:40,991 INFO    MainThread:6751 [wandb_init.py:init():465] updated telemetry
-2022-04-15 20:32:41,002 INFO    MainThread:6751 [wandb_init.py:init():484] communicating current version
-2022-04-15 20:32:41,033 INFO    WriterThread:6751 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/run-1bwp8j0o.wandb
-2022-04-15 20:32:41,353 INFO    MainThread:6751 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 20:32:41,353 INFO    MainThread:6751 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 20:32:42,068 INFO    MainThread:6751 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 20:32:42,154 INFO    SenderThread:6751 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files
-2022-04-15 20:32:42,154 INFO    SenderThread:6751 [sender.py:_start_run_threads():707] run started: 1bwp8j0o with start time 1650034960
-2022-04-15 20:32:42,155 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 20:32:43,129 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/requirements.txt
-2022-04-15 20:32:43,129 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/wandb-summary.json
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/code/train_translation.py
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/diff.patch
-2022-04-15 20:32:43,130 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/code
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 20:32:47,048 INFO    SenderThread:6751 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 20:32:47,081 INFO    MainThread:6751 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 20:32:47,082 INFO    MainThread:6751 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 20:32:47,082 INFO    MainThread:6751 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/conda-environment.yaml
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/output.log
-2022-04-15 20:32:47,128 INFO    Thread-12 :6751 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203240-1bwp8j0o/files/wandb-metadata.json
-2022-04-15 20:32:47,137 INFO    MainThread:6751 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 20:32:47,137 INFO    MainThread:6751 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 20:32:47,138 INFO    MainThread:6751 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-15 20:32:48,685 INFO    Thread-14 :6751 [upload_job.py:push():133] Uploaded file /tmp/tmpfzph_9yfwandb/a1uf7dt2-wandb-metadata.json
diff --git a/wandb/run-20220415_203240-1bwp8j0o/run-1bwp8j0o.wandb b/wandb/run-20220415_203240-1bwp8j0o/run-1bwp8j0o.wandb
deleted file mode 100644
index e69de29..0000000
diff --git a/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py b/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py
deleted file mode 100644
index a7a253c..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py
+++ /dev/null
@@ -1,401 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-                # wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-        predicted.append(out)
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print(out)
-        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-#    for i in len(tgt_tokens): 
-#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml b/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220415_203417-2injabwk/files/config.yaml b/wandb/run-20220415_203417-2injabwk/files/config.yaml
deleted file mode 100644
index b88038a..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 4
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 5
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 1
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 1
diff --git a/wandb/run-20220415_203417-2injabwk/files/diff.patch b/wandb/run-20220415_203417-2injabwk/files/diff.patch
deleted file mode 100644
index aba1e36..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/diff.patch
+++ /dev/null
@@ -1,30656 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..78b8901 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,72 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-+train_translation.py
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..a7a253c 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,16 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert, gpu)
-         predicted.append(out)
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print(out)
-+        print(tokenizer.convert_ids_to_tokens(tgt_out))
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +380,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +388,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+#    for i in len(tgt_tokens): 
-+#        tgt_tokens[i] = id2bert[tgt_tokens[i]]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..addd4fa 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220415_203417-2injabwk/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..b839e8d 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220415_203417-2injabwk/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..86c21fa 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220415_203417-2injabwk
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220415_203417-2injabwk/files/output.log b/wandb/run-20220415_203417-2injabwk/files/output.log
deleted file mode 100644
index 451faa2..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/output.log
+++ /dev/null
@@ -1,65 +0,0 @@
-
-train_translation.py
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-Exception in thread Thread-3:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
-    msg = self._response_queue.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
-Exception in thread Thread-15:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
-    status_response = self._interface.communicate_stop_status()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
-    resp = self._communicate(req, timeout=timeout, local=True)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
-    return self._communicate_async(rec, local=local).get(timeout=timeout)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
-    raise Exception("The wandb backend process has shutdown")
-Exception: The wandb backend process has shutdown
-Traceback (most recent call last):
-  File "<string>", line 1, in <module>
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
-    exitcode = _main(fd)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
-    return self._bootstrap()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
-    threading._shutdown()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
-    lock.acquire()
-KeyboardInterrupt
\ No newline at end of file
diff --git a/wandb/run-20220415_203417-2injabwk/files/requirements.txt b/wandb/run-20220415_203417-2injabwk/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json b/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json
deleted file mode 100644
index 35794ce..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T15:04:19.477918",
-    "startedAt": "2022-04-15T15:04:17.866522",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json b/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
deleted file mode 100644
index 9e26dfe..0000000
--- a/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/wandb/run-20220415_203417-2injabwk/logs/debug-internal.log b/wandb/run-20220415_203417-2injabwk/logs/debug-internal.log
deleted file mode 100644
index 4eaab20..0000000
--- a/wandb/run-20220415_203417-2injabwk/logs/debug-internal.log
+++ /dev/null
@@ -1,100 +0,0 @@
-2022-04-15 20:34:17,894 INFO    wandb_internal:6840 [internal.py:wandb_internal():91] W&B internal server running at pid: 6840, started at: 2022-04-15 20:34:17.893635
-2022-04-15 20:34:17,917 INFO    MainThread:6840 [wandb_init.py:init():423] backend started and connected
-2022-04-15 20:34:17,917 DEBUG   MainThread:6840 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-15 20:34:17,919 INFO    MainThread:6840 [wandb_init.py:init():465] updated telemetry
-2022-04-15 20:34:17,937 INFO    MainThread:6840 [wandb_init.py:init():484] communicating current version
-2022-04-15 20:34:17,938 DEBUG   SenderThread:6840 [sender.py:send():179] send: header
-2022-04-15 20:34:17,938 INFO    WriterThread:6840 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb
-2022-04-15 20:34:17,940 DEBUG   HandlerThread:6840 [handler.py:handle_request():124] handle_request: check_version
-2022-04-15 20:34:17,940 DEBUG   SenderThread:6840 [sender.py:send_request():193] send_request: check_version
-2022-04-15 20:34:18,241 INFO    MainThread:6840 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 20:34:18,242 INFO    MainThread:6840 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 20:34:18,244 DEBUG   SenderThread:6840 [sender.py:send():179] send: run
-2022-04-15 20:34:19,461 INFO    MainThread:6840 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 20:34:19,462 DEBUG   HandlerThread:6840 [handler.py:handle_request():124] handle_request: run_start
-2022-04-15 20:34:19,477 DEBUG   HandlerThread:6840 [meta.py:__init__():39] meta init
-2022-04-15 20:34:19,477 DEBUG   HandlerThread:6840 [meta.py:__init__():53] meta init done
-2022-04-15 20:34:19,477 DEBUG   HandlerThread:6840 [meta.py:probe():210] probe
-2022-04-15 20:34:19,503 DEBUG   HandlerThread:6840 [meta.py:_setup_git():200] setup git
-2022-04-15 20:34:19,521 INFO    SenderThread:6840 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files
-2022-04-15 20:34:19,522 INFO    SenderThread:6840 [sender.py:_start_run_threads():707] run started: 2injabwk with start time 1650035057
-2022-04-15 20:34:19,522 DEBUG   SenderThread:6840 [sender.py:send():179] send: summary
-2022-04-15 20:34:19,523 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 20:34:19,589 DEBUG   HandlerThread:6840 [meta.py:_setup_git():207] setup git done
-2022-04-15 20:34:19,590 DEBUG   HandlerThread:6840 [meta.py:_save_code():89] save code
-2022-04-15 20:34:19,635 DEBUG   HandlerThread:6840 [meta.py:_save_code():110] save code done
-2022-04-15 20:34:19,635 DEBUG   HandlerThread:6840 [meta.py:_save_patches():127] save patches
-2022-04-15 20:34:19,863 DEBUG   HandlerThread:6840 [meta.py:_save_patches():169] save patches done
-2022-04-15 20:34:19,863 DEBUG   HandlerThread:6840 [meta.py:_save_pip():57] save pip
-2022-04-15 20:34:19,864 DEBUG   HandlerThread:6840 [meta.py:_save_pip():71] save pip done
-2022-04-15 20:34:19,864 DEBUG   HandlerThread:6840 [meta.py:_save_conda():78] save conda
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/diff.patch
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code
-2022-04-15 20:34:24,109 DEBUG   HandlerThread:6840 [meta.py:_save_conda():86] save conda done
-2022-04-15 20:34:24,109 DEBUG   HandlerThread:6840 [meta.py:probe():252] probe done
-2022-04-15 20:34:24,112 DEBUG   SenderThread:6840 [sender.py:send():179] send: files
-2022-04-15 20:34:24,112 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 20:34:24,112 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 20:34:24,113 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 20:34:24,142 DEBUG   HandlerThread:6840 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-15 20:34:24,142 DEBUG   SenderThread:6840 [sender.py:send_request():193] send_request: stop_status
-2022-04-15 20:34:24,154 INFO    MainThread:6840 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 20:34:24,154 INFO    MainThread:6840 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 20:34:24,155 INFO    MainThread:6840 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 20:34:24,513 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:24,513 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json
-2022-04-15 20:34:24,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:24,850 DEBUG   SenderThread:6840 [sender.py:send():179] send: config
-2022-04-15 20:34:25,811 INFO    Thread-14 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/w18tghfd-wandb-metadata.json
-2022-04-15 20:34:25,876 INFO    Thread-17 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/22s9hrau-code/train_translation.py
-2022-04-15 20:34:26,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml
-2022-04-15 20:34:26,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:26,625 INFO    Thread-19 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/1n34jtgp-diff.patch
-2022-04-15 20:34:28,518 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:32,520 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:33,257 INFO    SenderThread:6840 [sender.py:finish():933] shutting down sender
-2022-04-15 20:34:33,257 INFO    SenderThread:6840 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 20:34:33,257 INFO    WriterThread:6840 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb
-2022-04-15 20:34:33,520 INFO    SenderThread:6840 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt requirements.txt
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log output.log
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json wandb-summary.json
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml config.yaml
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/diff.patch diff.patch
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py code/train_translation.py
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 20:34:35,046 INFO    Thread-24 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:35,048 INFO    Thread-27 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml
-2022-04-15 20:34:35,101 INFO    Thread-25 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:35,453 INFO    Thread-26 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
-2022-04-15 20:34:35,455 INFO    Thread-23 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt
-2022-04-15 20:34:36,378 ERROR   wandb_internal:6840 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 20:46:13,288 INFO    MainThread:6840 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 20:46:13,288 INFO    MainThread:6840 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:46:14,033 INFO    MainThread:6840 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:46:14,036 INFO    MainThread:6840 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_203417-2injabwk/logs/debug.log b/wandb/run-20220415_203417-2injabwk/logs/debug.log
deleted file mode 100644
index d999a97..0000000
--- a/wandb/run-20220415_203417-2injabwk/logs/debug.log
+++ /dev/null
@@ -1,85 +0,0 @@
-2022-04-15 20:34:17,868 INFO    MainThread:6840 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-15 20:34:17,868 INFO    MainThread:6840 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-15 20:34:17,868 INFO    MainThread:6840 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/logs/debug.log
-2022-04-15 20:34:17,868 INFO    MainThread:6840 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/logs/debug-internal.log
-2022-04-15 20:34:17,868 INFO    MainThread:6840 [wandb_init.py:init():369] calling init triggers
-2022-04-15 20:34:17,869 INFO    MainThread:6840 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 20:34:17,869 INFO    MainThread:6840 [wandb_init.py:init():418] starting backend
-2022-04-15 20:34:17,879 INFO    MainThread:6840 [backend.py:ensure_launched():132] starting backend process...
-2022-04-15 20:34:17,893 INFO    MainThread:6840 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-15 20:34:17,894 INFO    wandb_internal:6840 [internal.py:wandb_internal():91] W&B internal server running at pid: 6840, started at: 2022-04-15 20:34:17.893635
-2022-04-15 20:34:17,917 INFO    MainThread:6840 [wandb_init.py:init():423] backend started and connected
-2022-04-15 20:34:17,919 INFO    MainThread:6840 [wandb_init.py:init():465] updated telemetry
-2022-04-15 20:34:17,937 INFO    MainThread:6840 [wandb_init.py:init():484] communicating current version
-2022-04-15 20:34:17,938 INFO    WriterThread:6840 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb
-2022-04-15 20:34:18,241 INFO    MainThread:6840 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-15 20:34:18,242 INFO    MainThread:6840 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-15 20:34:19,461 INFO    MainThread:6840 [wandb_init.py:init():522] starting run threads in backend
-2022-04-15 20:34:19,521 INFO    SenderThread:6840 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files
-2022-04-15 20:34:19,522 INFO    SenderThread:6840 [sender.py:_start_run_threads():707] run started: 2injabwk with start time 1650035057
-2022-04-15 20:34:19,523 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:20,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/diff.patch
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt
-2022-04-15 20:34:20,515 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code
-2022-04-15 20:34:24,112 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-15 20:34:24,112 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-15 20:34:24,113 INFO    SenderThread:6840 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-15 20:34:24,154 INFO    MainThread:6840 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-15 20:34:24,154 INFO    MainThread:6840 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-15 20:34:24,155 INFO    MainThread:6840 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-15 20:34:24,227 INFO    MainThread:6840 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
-2022-04-15 20:34:24,513 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:24,513 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json
-2022-04-15 20:34:24,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:25,811 INFO    Thread-14 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/w18tghfd-wandb-metadata.json
-2022-04-15 20:34:25,876 INFO    Thread-17 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/22s9hrau-code/train_translation.py
-2022-04-15 20:34:26,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml
-2022-04-15 20:34:26,514 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:26,625 INFO    Thread-19 :6840 [upload_job.py:push():133] Uploaded file /tmp/tmpdyry0x9pwandb/1n34jtgp-diff.patch
-2022-04-15 20:34:28,518 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:32,520 INFO    Thread-12 :6840 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:33,257 INFO    SenderThread:6840 [sender.py:finish():933] shutting down sender
-2022-04-15 20:34:33,257 INFO    SenderThread:6840 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-15 20:34:33,257 INFO    WriterThread:6840 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb
-2022-04-15 20:34:33,520 INFO    SenderThread:6840 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt requirements.txt
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-metadata.json wandb-metadata.json
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log output.log
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml conda-environment.yaml
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json wandb-summary.json
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml config.yaml
-2022-04-15 20:34:33,521 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/diff.patch diff.patch
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/code/train_translation.py code/train_translation.py
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-15 20:34:33,522 INFO    SenderThread:6840 [file_pusher.py:join():181] waiting for file pusher
-2022-04-15 20:34:35,046 INFO    Thread-24 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/output.log
-2022-04-15 20:34:35,048 INFO    Thread-27 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/config.yaml
-2022-04-15 20:34:35,101 INFO    Thread-25 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/conda-environment.yaml
-2022-04-15 20:34:35,453 INFO    Thread-26 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/wandb-summary.json
-2022-04-15 20:34:35,455 INFO    Thread-23 :6840 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220415_203417-2injabwk/files/requirements.txt
-2022-04-15 20:34:36,378 ERROR   wandb_internal:6840 [internal.py:wandb_internal():159] Thread HandlerThread:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
-    self._run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
-    record = self._input_record_q.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
-EOFError
-2022-04-15 20:46:13,288 INFO    MainThread:6840 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-15 20:46:13,288 INFO    MainThread:6840 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:46:14,033 INFO    MainThread:6840 [wandb_run.py:_restore():1480] restore
-2022-04-15 20:46:14,036 INFO    MainThread:6840 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb b/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb
deleted file mode 100644
index 3ae463a..0000000
Binary files a/wandb/run-20220415_203417-2injabwk/run-2injabwk.wandb and /dev/null differ
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py b/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py
deleted file mode 100644
index 49b1b0a..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py
+++ /dev/null
@@ -1,402 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-#                wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-        predicted.append(out)
-        for i in range(len(tgt_out)): 
-            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert_dict, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-    for i in range(len(tgt_tokens)): 
-        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml b/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml b/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
deleted file mode 100644
index 4458c44..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      2:
-      - 1
-      - 11
-      3:
-      - 2
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 16
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 5
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch b/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch
deleted file mode 100644
index 33f52c4..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch
+++ /dev/null
@@ -1,30763 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..2b00de1 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,160 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-+train_translation.py
-+train_translation.py
-+train_translation.py
-+train_translation.py --load=1
-+train_translation.py --load=1
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 66}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 179}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 16}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 184}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 240}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 296}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 352}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 408}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 464}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 692}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 106}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 441}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 188}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..49b1b0a 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -49,7 +50,7 @@ parser.add_argument('--workers', default=4, type=int, metavar='N',
-                     help='number of data loader workers') 
- parser.add_argument('--epochs', default=5, type=int, metavar='N',
-                     help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-+parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                     help='mini-batch size')
- parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                     help='base learning rate')
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -267,7 +269,7 @@ def main_worker(gpu, args):
-                 optimizer.step()
-                 # losses += loss.item()
-                 
--                # wandb.log({'iter_loss': loss})
-+#                wandb.log({'iter_loss': loss})
-                 epoch_loss += loss.item()
-                 t += 1 
-                 torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,17 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-         predicted.append(out)
-+        for i in range(len(tgt_out)): 
-+            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +381,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert_dict, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +389,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+    for i in range(len(tgt_tokens)): 
-+        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..5fd3d32 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220416_013009-2m8v6ch7/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..95199a3 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220416_013009-2m8v6ch7/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..f412bf7 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220416_013009-2m8v6ch7
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/output.log b/wandb/run-20220416_013009-2m8v6ch7/files/output.log
deleted file mode 100644
index 21faf62..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/output.log
+++ /dev/null
@@ -1,25 +0,0 @@
-
-train_translation.py --load=0
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-translation model saved in checkpoint
-{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 70}
-{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 70}
-translation model saved in checkpoint
-{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 116}
-{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 116}
-{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 117}
-translation model saved in checkpoint
-{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 164}
-{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 165}
-translation model saved in checkpoint
-{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 182}
-{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 183}
-translation model saved in checkpoint
\ No newline at end of file
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt b/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json b/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json
deleted file mode 100644
index dbffe1f..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T20:00:10.396365",
-    "startedAt": "2022-04-15T20:00:09.148879",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [
-        "--load=0"
-    ],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json b/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
deleted file mode 100644
index 1fcb966..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 79.08950964609782, "_runtime": 195, "_timestamp": 1650053004, "_step": 5, "bleu_score": 0.0}
\ No newline at end of file
diff --git a/wandb/run-20220416_013009-2m8v6ch7/logs/debug-internal.log b/wandb/run-20220416_013009-2m8v6ch7/logs/debug-internal.log
deleted file mode 100644
index 406d1ee..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/logs/debug-internal.log
+++ /dev/null
@@ -1,388 +0,0 @@
-2022-04-16 01:30:09,156 INFO    wandb_internal:3047 [internal.py:wandb_internal():91] W&B internal server running at pid: 3047, started at: 2022-04-16 01:30:09.155690
-2022-04-16 01:30:09,157 INFO    MainThread:3047 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:30:09,158 INFO    MainThread:3047 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:30:09,158 DEBUG   MainThread:3047 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-16 01:30:09,159 INFO    MainThread:3047 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:30:09,160 INFO    MainThread:3047 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:30:09,160 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: check_version
-2022-04-16 01:30:09,160 INFO    WriterThread:3047 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb
-2022-04-16 01:30:09,161 DEBUG   SenderThread:3047 [sender.py:send():179] send: header
-2022-04-16 01:30:09,162 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: check_version
-2022-04-16 01:30:09,594 INFO    MainThread:3047 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:30:09,595 INFO    MainThread:3047 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:30:09,595 DEBUG   SenderThread:3047 [sender.py:send():179] send: run
-2022-04-16 01:30:10,393 INFO    MainThread:3047 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:30:10,393 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: run_start
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [sender.py:_start_run_threads():707] run started: 2m8v6ch7 with start time 1650052809
-2022-04-16 01:30:10,394 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:30:10,396 DEBUG   HandlerThread:3047 [meta.py:__init__():39] meta init
-2022-04-16 01:30:10,396 DEBUG   HandlerThread:3047 [meta.py:__init__():53] meta init done
-2022-04-16 01:30:10,396 DEBUG   HandlerThread:3047 [meta.py:probe():210] probe
-2022-04-16 01:30:10,402 DEBUG   HandlerThread:3047 [meta.py:_setup_git():200] setup git
-2022-04-16 01:30:10,417 DEBUG   HandlerThread:3047 [meta.py:_setup_git():207] setup git done
-2022-04-16 01:30:10,417 DEBUG   HandlerThread:3047 [meta.py:_save_code():89] save code
-2022-04-16 01:30:10,424 DEBUG   HandlerThread:3047 [meta.py:_save_code():110] save code done
-2022-04-16 01:30:10,424 DEBUG   HandlerThread:3047 [meta.py:_save_patches():127] save patches
-2022-04-16 01:30:10,560 DEBUG   HandlerThread:3047 [meta.py:_save_patches():169] save patches done
-2022-04-16 01:30:10,560 DEBUG   HandlerThread:3047 [meta.py:_save_pip():57] save pip
-2022-04-16 01:30:10,560 DEBUG   HandlerThread:3047 [meta.py:_save_pip():71] save pip done
-2022-04-16 01:30:10,560 DEBUG   HandlerThread:3047 [meta.py:_save_conda():78] save conda
-2022-04-16 01:30:11,399 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch
-2022-04-16 01:30:11,400 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt
-2022-04-16 01:30:11,401 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/code
-2022-04-16 01:30:13,396 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
-2022-04-16 01:30:13,402 DEBUG   HandlerThread:3047 [meta.py:_save_conda():86] save conda done
-2022-04-16 01:30:13,402 DEBUG   HandlerThread:3047 [meta.py:probe():252] probe done
-2022-04-16 01:30:13,405 DEBUG   SenderThread:3047 [sender.py:send():179] send: files
-2022-04-16 01:30:13,406 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:30:13,407 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:30:13,407 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:30:13,412 INFO    MainThread:3047 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:30:13,415 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:30:13,415 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:30:13,415 INFO    MainThread:3047 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:30:13,417 INFO    MainThread:3047 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:30:14,156 DEBUG   SenderThread:3047 [sender.py:send():179] send: config
-2022-04-16 01:30:14,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json
-2022-04-16 01:30:14,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:15,121 INFO    Thread-14 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/1u7lv5wr-wandb-metadata.json
-2022-04-16 01:30:15,209 INFO    Thread-17 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/4wbr9a95-code/train_translation.py
-2022-04-16 01:30:16,138 INFO    Thread-22 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/1f5szweq-diff.patch
-2022-04-16 01:30:16,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:16,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
-2022-04-16 01:30:18,399 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:25,465 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:27,470 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:27,660 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:30:27,660 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:30:27,660 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:30:28,591 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:30:29,157 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:30:29,157 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:30:39,019 DEBUG   SenderThread:3047 [sender.py:send():179] send: stats
-2022-04-16 01:30:43,595 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:44,867 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:30:44,867 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:31:00,710 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:31:00,710 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:31:09,489 DEBUG   SenderThread:3047 [sender.py:send():179] send: stats
-2022-04-16 01:31:16,370 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:31:16,370 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:31:24,719 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:31:24,719 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:31:24,720 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:31:25,608 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:31:31,028 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:31:31,029 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:31:31,029 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:31:31,609 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:31:31,610 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:31:32,032 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:31:32,032 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:31:40,142 DEBUG   SenderThread:3047 [sender.py:send():179] send: stats
-2022-04-16 01:31:43,612 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:31:47,765 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:31:47,765 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:32:03,456 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:32:03,456 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:32:17,464 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:32:17,464 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:32:17,466 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:32:17,622 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:32:17,622 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:32:19,176 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:32:19,176 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:32:33,638 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:32:34,812 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:32:34,812 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:32:50,521 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:32:50,521 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:33:05,050 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:33:05,050 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:33:05,052 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:33:05,647 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:05,647 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:06,206 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:33:06,206 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:33:09,011 DEBUG   SenderThread:3047 [sender.py:send():179] send: stats
-2022-04-16 01:33:19,651 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:21,889 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:33:21,889 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:33:23,662 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:24,091 DEBUG   SenderThread:3047 [sender.py:send():179] send: history
-2022-04-16 01:33:24,091 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:33:24,092 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:33:24,662 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:37,579 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:33:37,579 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:33:38,348 INFO    MainThread:3047 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2m8v6ch7
-2022-04-16 01:33:38,349 INFO    MainThread:3047 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
-2022-04-16 01:33:38,350 INFO    MainThread:3047 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:33:38,674 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:38,941 DEBUG   SenderThread:3047 [sender.py:send():179] send: telemetry
-2022-04-16 01:33:38,943 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:38,943 DEBUG   SenderThread:3047 [sender.py:send():179] send: exit
-2022-04-16 01:33:38,944 INFO    SenderThread:3047 [sender.py:send_exit():287] handling exit code: 0
-2022-04-16 01:33:38,944 INFO    SenderThread:3047 [sender.py:send_exit():295] send defer
-2022-04-16 01:33:38,944 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:38,946 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 2
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1744922
-}
-
-2022-04-16 01:33:38,948 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:38,948 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 0
-2022-04-16 01:33:38,949 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:38,949 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 0
-2022-04-16 01:33:38,949 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 1
-2022-04-16 01:33:38,950 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:38,950 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 1
-2022-04-16 01:33:39,026 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:39,026 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 1
-2022-04-16 01:33:39,026 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 2
-2022-04-16 01:33:39,027 DEBUG   SenderThread:3047 [sender.py:send():179] send: stats
-2022-04-16 01:33:39,027 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:39,027 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 2
-2022-04-16 01:33:39,027 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:39,027 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 2
-2022-04-16 01:33:39,027 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 3
-2022-04-16 01:33:39,028 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:39,028 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 3
-2022-04-16 01:33:39,028 DEBUG   SenderThread:3047 [sender.py:send():179] send: summary
-2022-04-16 01:33:39,028 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:33:39,028 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:39,029 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 3
-2022-04-16 01:33:39,029 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 4
-2022-04-16 01:33:39,029 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:39,029 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 4
-2022-04-16 01:33:39,029 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:39,029 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 4
-2022-04-16 01:33:39,048 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:39,675 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:39,675 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:39,793 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 5
-2022-04-16 01:33:39,793 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:39,794 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:39,794 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 5
-2022-04-16 01:33:39,794 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:39,795 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 5
-2022-04-16 01:33:39,795 INFO    SenderThread:3047 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:33:39,795 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 2
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1744922
-}
-
-2022-04-16 01:33:39,897 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:40,675 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
-2022-04-16 01:33:40,677 INFO    SenderThread:3047 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files
-2022-04-16 01:33:40,677 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt requirements.txt
-2022-04-16 01:33:40,678 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json wandb-metadata.json
-2022-04-16 01:33:40,678 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log output.log
-2022-04-16 01:33:40,679 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml conda-environment.yaml
-2022-04-16 01:33:40,679 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json wandb-summary.json
-2022-04-16 01:33:40,690 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml config.yaml
-2022-04-16 01:33:40,701 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch diff.patch
-2022-04-16 01:33:40,730 INFO    SenderThread:3047 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py code/train_translation.py
-2022-04-16 01:33:40,730 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 6
-2022-04-16 01:33:40,731 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:40,732 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:40,733 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 6
-2022-04-16 01:33:40,734 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:40,734 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:40,735 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 6
-2022-04-16 01:33:40,735 INFO    SenderThread:3047 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:33:40,735 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 7
-2022-04-16 01:33:40,736 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:40,737 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 7
-2022-04-16 01:33:40,737 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:40,737 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 7
-2022-04-16 01:33:40,835 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,471 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 8
-2022-04-16 01:33:41,471 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,473 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:41,473 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:41,474 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 8
-2022-04-16 01:33:41,475 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:41,475 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 8
-2022-04-16 01:33:41,476 INFO    SenderThread:3047 [sender.py:send_request_defer():342] send defer: 9
-2022-04-16 01:33:41,477 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:33:41,478 DEBUG   SenderThread:3047 [sender.py:send():179] send: final
-2022-04-16 01:33:41,478 INFO    HandlerThread:3047 [handler.py:handle_request_defer():141] handle defer: 9
-2022-04-16 01:33:41,478 DEBUG   SenderThread:3047 [sender.py:send():179] send: footer
-2022-04-16 01:33:41,479 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:33:41,480 INFO    SenderThread:3047 [sender.py:send_request_defer():304] handle sender defer: 9
-2022-04-16 01:33:41,575 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,576 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,576 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:41,678 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,679 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,680 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1744922
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:41,781 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,782 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,783 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:41,885 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,886 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,887 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:41,989 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:41,990 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:41,991 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,092 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,092 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,093 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,194 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,195 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,196 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,277 INFO    Thread-29 :3047 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt
-2022-04-16 01:33:42,283 INFO    Thread-30 :3047 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:42,286 INFO    Thread-31 :3047 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
-2022-04-16 01:33:42,297 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,298 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,299 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,351 INFO    Thread-32 :3047 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:42,365 INFO    Thread-33 :3047 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
-2022-04-16 01:33:42,401 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,401 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,403 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,504 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,505 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,506 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,608 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:33:42,608 DEBUG   SenderThread:3047 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:33:42,609 INFO    SenderThread:3047 [file_pusher.py:join():181] waiting for file pusher
-2022-04-16 01:33:42,610 INFO    MainThread:3047 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
-exit_result {
-}
-file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1754733
-  total_bytes: 1754733
-}
-
-2022-04-16 01:33:42,611 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: get_summary
-2022-04-16 01:33:42,613 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: sampled_history
-2022-04-16 01:33:42,616 DEBUG   HandlerThread:3047 [handler.py:handle_request():124] handle_request: shutdown
-2022-04-16 01:33:42,617 INFO    HandlerThread:3047 [handler.py:finish():638] shutting down handler
-2022-04-16 01:33:43,478 INFO    WriterThread:3047 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb
-2022-04-16 01:33:43,609 INFO    SenderThread:3047 [sender.py:finish():933] shutting down sender
-2022-04-16 01:33:43,610 INFO    SenderThread:3047 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:33:43,610 INFO    SenderThread:3047 [file_pusher.py:join():181] waiting for file pusher
-2022-04-16 01:33:43,634 INFO    MainThread:3047 [wandb_run.py:_show_summary():1785] rendering summary
-2022-04-16 01:33:43,635 INFO    MainThread:3047 [wandb_run.py:_show_history():1823] rendering history
-2022-04-16 01:33:43,635 INFO    MainThread:3047 [wandb_run.py:_show_files():1852] logging synced files
-2022-04-16 01:33:43,661 INFO    MainThread:3047 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220416_013009-2m8v6ch7/logs/debug.log b/wandb/run-20220416_013009-2m8v6ch7/logs/debug.log
deleted file mode 100644
index 329a7e5..0000000
--- a/wandb/run-20220416_013009-2m8v6ch7/logs/debug.log
+++ /dev/null
@@ -1,69 +0,0 @@
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/logs/debug.log
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/logs/debug-internal.log
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_init.py:init():369] calling init triggers
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 5, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:30:09,150 INFO    MainThread:3047 [wandb_init.py:init():418] starting backend
-2022-04-16 01:30:09,155 INFO    MainThread:3047 [backend.py:ensure_launched():132] starting backend process...
-2022-04-16 01:30:09,156 INFO    wandb_internal:3047 [internal.py:wandb_internal():91] W&B internal server running at pid: 3047, started at: 2022-04-16 01:30:09.155690
-2022-04-16 01:30:09,157 INFO    MainThread:3047 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:30:09,158 INFO    MainThread:3047 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:30:09,159 INFO    MainThread:3047 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:30:09,160 INFO    MainThread:3047 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:30:09,160 INFO    WriterThread:3047 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb
-2022-04-16 01:30:09,594 INFO    MainThread:3047 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:30:09,595 INFO    MainThread:3047 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:30:10,393 INFO    MainThread:3047 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [sender.py:_start_run_threads():707] run started: 2m8v6ch7 with start time 1650052809
-2022-04-16 01:30:10,394 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:30:11,399 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/diff.patch
-2022-04-16 01:30:11,400 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/requirements.txt
-2022-04-16 01:30:11,401 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/code/train_translation.py
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:30:11,402 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/code
-2022-04-16 01:30:13,396 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/conda-environment.yaml
-2022-04-16 01:30:13,406 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:30:13,407 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:30:13,407 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:30:13,412 INFO    MainThread:3047 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:30:13,415 INFO    MainThread:3047 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:30:13,417 INFO    MainThread:3047 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:30:13,418 INFO    MainThread:3047 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:30:14,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-metadata.json
-2022-04-16 01:30:14,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:15,121 INFO    Thread-14 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/1u7lv5wr-wandb-metadata.json
-2022-04-16 01:30:15,209 INFO    Thread-17 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/4wbr9a95-code/train_translation.py
-2022-04-16 01:30:16,138 INFO    Thread-22 :3047 [upload_job.py:push():133] Uploaded file /tmp/tmp_xxxs0wowandb/1f5szweq-diff.patch
-2022-04-16 01:30:16,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:16,398 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/config.yaml
-2022-04-16 01:30:18,399 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:25,465 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:27,470 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:30:27,660 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:30:28,591 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:30:43,595 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:31:24,720 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:31:25,608 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:31:31,029 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:31:31,609 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:31:31,610 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:31:43,612 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:32:17,466 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:32:17,622 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:32:17,622 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:32:33,638 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:05,052 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:33:05,647 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:05,647 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:19,651 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:23,662 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/output.log
-2022-04-16 01:33:24,092 INFO    SenderThread:3047 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:33:24,662 INFO    Thread-11 :3047 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013009-2m8v6ch7/files/wandb-summary.json
-2022-04-16 01:33:38,348 INFO    MainThread:3047 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2m8v6ch7
diff --git a/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb b/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb
deleted file mode 100644
index 4cd4d16..0000000
Binary files a/wandb/run-20220416_013009-2m8v6ch7/run-2m8v6ch7.wandb and /dev/null differ
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py b/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py
deleted file mode 100644
index ecaff5f..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py
+++ /dev/null
@@ -1,402 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-#                wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-        predicted.append(out)
-        for i in range(len(tgt_out)): 
-            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert_dict, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-    for i in range(len(tgt_tokens)): 
-        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml b/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/config.yaml b/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
deleted file mode 100644
index d0bb2ba..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
+++ /dev/null
@@ -1,115 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      2:
-      - 1
-      - 11
-      3:
-      - 2
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 16
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 10
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/diff.patch b/wandb/run-20220416_013544-2rw6cucs/files/diff.patch
deleted file mode 100644
index 569fe58..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/diff.patch
+++ /dev/null
@@ -1,30779 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..03d7a9b 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,173 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-+train_translation.py
-+train_translation.py
-+train_translation.py
-+train_translation.py --load=1
-+train_translation.py --load=1
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 66}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 179}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 16}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 184}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 240}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 296}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 352}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 408}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 464}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 692}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 106}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 441}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 188}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-+{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 70}
-+{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 70}
-+{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 116}
-+{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 116}
-+{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 117}
-+{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 164}
-+{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 165}
-+{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 182}
-+{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 183}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..ecaff5f 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -47,9 +48,9 @@ parser = argparse.ArgumentParser(description = 'Translation')
- # Training hyper-parameters: 
- parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                     help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
-+parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                     help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-+parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                     help='mini-batch size')
- parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                     help='base learning rate')
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -267,7 +269,7 @@ def main_worker(gpu, args):
-                 optimizer.step()
-                 # losses += loss.item()
-                 
--                # wandb.log({'iter_loss': loss})
-+#                wandb.log({'iter_loss': loss})
-                 epoch_loss += loss.item()
-                 t += 1 
-                 torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,13 +327,17 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-         predicted.append(out)
-+        for i in range(len(tgt_out)): 
-+            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+
-                 
-         try: 
-             bleu_score(predicted, target)
-@@ -375,7 +381,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert_dict, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +389,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+    for i in range(len(tgt_tokens)): 
-+        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..fdf4076 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220416_013544-2rw6cucs/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..83d0ecb 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220416_013544-2rw6cucs/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..923d2ad 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220416_013544-2rw6cucs
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/output.log b/wandb/run-20220416_013544-2rw6cucs/files/output.log
deleted file mode 100644
index 658db0f..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/output.log
+++ /dev/null
@@ -1,42 +0,0 @@
-
-train_translation.py --load=0
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-translation model saved in checkpoint
-{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 73}
-{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 74}
-translation model saved in checkpoint
-{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 92}
-{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 93}
-{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 93}
-translation model saved in checkpoint
-{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 110}
-{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 111}
-translation model saved in checkpoint
-{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 131}
-{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 132}
-translation model saved in checkpoint
-{"epoch": 5, "step": 60, "loss": 62.27414321899414, "time": 149}
-{"epoch": 5, "step": 65, "loss": 90.9207992553711, "time": 150}
-{"epoch": 5, "step": 70, "loss": 66.96754455566406, "time": 150}
-translation model saved in checkpoint
-{"epoch": 6, "step": 75, "loss": 71.40245819091797, "time": 216}
-{"epoch": 6, "step": 80, "loss": 63.940818786621094, "time": 217}
-translation model saved in checkpoint
-{"epoch": 7, "step": 85, "loss": 50.857147216796875, "time": 233}
-{"epoch": 7, "step": 90, "loss": 78.37335205078125, "time": 234}
-{"epoch": 7, "step": 95, "loss": 100.13611602783203, "time": 234}
-translation model saved in checkpoint
-{"epoch": 8, "step": 100, "loss": 80.35195922851562, "time": 252}
-{"epoch": 8, "step": 105, "loss": 86.00081634521484, "time": 253}
-translation model saved in checkpoint
-{"epoch": 9, "step": 110, "loss": 82.35330200195312, "time": 272}
-{"epoch": 9, "step": 115, "loss": 88.81517791748047, "time": 273}
-translation model saved in checkpoint
\ No newline at end of file
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt b/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json b/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json
deleted file mode 100644
index 9a29c9c..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T20:05:45.959756",
-    "startedAt": "2022-04-15T20:05:44.728209",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [
-        "--load=0"
-    ],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json b/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
deleted file mode 100644
index c14a271..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 86.59892717997234, "_runtime": 284, "_timestamp": 1650053428, "_step": 11, "bleu_score": 0.0}
\ No newline at end of file
diff --git a/wandb/run-20220416_013544-2rw6cucs/logs/debug-internal.log b/wandb/run-20220416_013544-2rw6cucs/logs/debug-internal.log
deleted file mode 100644
index e841066..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/logs/debug-internal.log
+++ /dev/null
@@ -1,441 +0,0 @@
-2022-04-16 01:35:44,735 INFO    wandb_internal:4584 [internal.py:wandb_internal():91] W&B internal server running at pid: 4584, started at: 2022-04-16 01:35:44.734800
-2022-04-16 01:35:44,735 INFO    MainThread:4584 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:35:44,736 INFO    MainThread:4584 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:35:44,737 DEBUG   MainThread:4584 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-16 01:35:44,738 INFO    MainThread:4584 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:35:44,738 INFO    MainThread:4584 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:35:44,739 DEBUG   SenderThread:4584 [sender.py:send():179] send: header
-2022-04-16 01:35:44,739 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: check_version
-2022-04-16 01:35:44,741 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: check_version
-2022-04-16 01:35:44,740 INFO    WriterThread:4584 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb
-2022-04-16 01:35:45,091 INFO    MainThread:4584 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:35:45,091 INFO    MainThread:4584 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:35:45,095 DEBUG   SenderThread:4584 [sender.py:send():179] send: run
-2022-04-16 01:35:45,945 INFO    MainThread:4584 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:35:45,948 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: run_start
-2022-04-16 01:35:45,951 INFO    SenderThread:4584 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files
-2022-04-16 01:35:45,951 INFO    SenderThread:4584 [sender.py:_start_run_threads():707] run started: 2rw6cucs with start time 1650053144
-2022-04-16 01:35:45,952 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:35:45,952 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:35:45,959 DEBUG   HandlerThread:4584 [meta.py:__init__():39] meta init
-2022-04-16 01:35:45,959 DEBUG   HandlerThread:4584 [meta.py:__init__():53] meta init done
-2022-04-16 01:35:45,959 DEBUG   HandlerThread:4584 [meta.py:probe():210] probe
-2022-04-16 01:35:45,968 DEBUG   HandlerThread:4584 [meta.py:_setup_git():200] setup git
-2022-04-16 01:35:46,021 DEBUG   HandlerThread:4584 [meta.py:_setup_git():207] setup git done
-2022-04-16 01:35:46,022 DEBUG   HandlerThread:4584 [meta.py:_save_code():89] save code
-2022-04-16 01:35:46,039 DEBUG   HandlerThread:4584 [meta.py:_save_code():110] save code done
-2022-04-16 01:35:46,039 DEBUG   HandlerThread:4584 [meta.py:_save_patches():127] save patches
-2022-04-16 01:35:46,144 DEBUG   HandlerThread:4584 [meta.py:_save_patches():169] save patches done
-2022-04-16 01:35:46,145 DEBUG   HandlerThread:4584 [meta.py:_save_pip():57] save pip
-2022-04-16 01:35:46,145 DEBUG   HandlerThread:4584 [meta.py:_save_pip():71] save pip done
-2022-04-16 01:35:46,145 DEBUG   HandlerThread:4584 [meta.py:_save_conda():78] save conda
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/diff.patch
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/code
-2022-04-16 01:35:47,657 DEBUG   HandlerThread:4584 [meta.py:_save_conda():86] save conda done
-2022-04-16 01:35:47,657 DEBUG   HandlerThread:4584 [meta.py:probe():252] probe done
-2022-04-16 01:35:47,659 DEBUG   SenderThread:4584 [sender.py:send():179] send: files
-2022-04-16 01:35:47,660 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:35:47,661 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:35:47,661 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:35:47,668 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:35:47,668 INFO    MainThread:4584 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:35:47,669 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:35:47,670 INFO    MainThread:4584 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:35:47,670 INFO    MainThread:4584 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:48,344 DEBUG   SenderThread:4584 [sender.py:send():179] send: config
-2022-04-16 01:35:49,366 INFO    Thread-14 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/2pht4hd1-wandb-metadata.json
-2022-04-16 01:35:49,466 INFO    Thread-16 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/1v7xd8v7-code/train_translation.py
-2022-04-16 01:35:49,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:50,313 INFO    Thread-22 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/2zhfst8q-diff.patch
-2022-04-16 01:35:50,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
-2022-04-16 01:35:51,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:53,954 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:36:01,747 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:36:01,747 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:36:01,747 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:36:01,956 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:36:01,957 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:36:03,346 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:36:03,347 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:36:13,960 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:36:14,917 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:36:19,013 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:36:19,014 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:36:34,658 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:36:34,658 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:36:45,432 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:36:50,310 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:36:50,310 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:37:02,753 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:37:02,753 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:37:02,754 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:02,975 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:05,982 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:37:05,982 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:37:09,307 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:37:09,307 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:37:09,307 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:09,982 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:09,982 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:16,103 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:37:21,651 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:37:21,651 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:37:23,988 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:27,989 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:28,464 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:37:28,464 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:37:28,465 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:28,992 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:29,992 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:37,481 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:37:37,481 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:37:42,029 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:46,029 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:46,461 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:37:46,461 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:37:46,462 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:46,663 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:37:47,033 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:48,033 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:53,201 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:37:53,201 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:38:02,037 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:06,038 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:07,261 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:38:07,261 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:38:07,262 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:38:08,288 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:38:08,364 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:08,927 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:38:08,927 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:38:17,417 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:38:20,291 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:24,293 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:24,597 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:38:24,597 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:38:25,471 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:38:25,471 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:38:25,471 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:38:26,500 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:38:26,500 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:40,265 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:38:40,266 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:38:40,504 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:48,033 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:38:55,936 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:38:55,936 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:39:11,586 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:39:11,586 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:39:18,577 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:39:25,381 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:39:25,381 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:39:25,384 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:25,519 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:27,259 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:39:27,259 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:39:32,019 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:39:32,019 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:39:32,020 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:32,545 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:32,545 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:43,051 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:39:43,052 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:39:44,548 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:48,550 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:49,332 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:39:49,589 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:39:49,589 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:39:49,589 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:50,604 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:50,605 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:58,737 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:39:58,738 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:40:04,608 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:08,350 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:40:08,350 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:40:08,350 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:40:08,610 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:08,610 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:14,447 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:40:14,447 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:40:20,107 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:40:24,614 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:28,328 DEBUG   SenderThread:4584 [sender.py:send():179] send: history
-2022-04-16 01:40:28,328 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:40:28,328 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:40:28,621 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:28,621 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:30,122 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:40:30,122 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:40:42,389 INFO    MainThread:4584 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2rw6cucs
-2022-04-16 01:40:42,390 INFO    MainThread:4584 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
-2022-04-16 01:40:42,391 INFO    MainThread:4584 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:40:43,356 DEBUG   SenderThread:4584 [sender.py:send():179] send: telemetry
-2022-04-16 01:40:43,357 DEBUG   SenderThread:4584 [sender.py:send():179] send: exit
-2022-04-16 01:40:43,357 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:43,358 INFO    SenderThread:4584 [sender.py:send_exit():287] handling exit code: 0
-2022-04-16 01:40:43,358 INFO    SenderThread:4584 [sender.py:send_exit():295] send defer
-2022-04-16 01:40:43,359 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:43,360 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:43,361 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 0
-2022-04-16 01:40:43,361 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:43,361 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 0
-2022-04-16 01:40:43,362 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 1
-2022-04-16 01:40:43,363 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:43,363 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 1
-2022-04-16 01:40:43,363 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 2
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1745897
-  total_bytes: 1745897
-}
-
-2022-04-16 01:40:43,436 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:43,436 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 1
-2022-04-16 01:40:43,436 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 2
-2022-04-16 01:40:43,437 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:43,437 DEBUG   SenderThread:4584 [sender.py:send():179] send: stats
-2022-04-16 01:40:43,437 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 2
-2022-04-16 01:40:43,437 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:43,437 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 2
-2022-04-16 01:40:43,437 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 3
-2022-04-16 01:40:43,438 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:43,438 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 3
-2022-04-16 01:40:43,438 DEBUG   SenderThread:4584 [sender.py:send():179] send: summary
-2022-04-16 01:40:43,438 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:40:43,439 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:43,439 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 3
-2022-04-16 01:40:43,439 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 4
-2022-04-16 01:40:43,439 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:43,439 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 4
-2022-04-16 01:40:43,439 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:43,439 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 4
-2022-04-16 01:40:43,465 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:43,631 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:43,632 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:44,194 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 5
-2022-04-16 01:40:44,194 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:44,196 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:44,196 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 5
-2022-04-16 01:40:44,196 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 2
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1745897
-  total_bytes: 1745897
-}
-
-2022-04-16 01:40:44,197 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:44,197 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 5
-2022-04-16 01:40:44,198 INFO    SenderThread:4584 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:40:44,298 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:44,632 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
-2022-04-16 01:40:44,634 INFO    SenderThread:4584 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files
-2022-04-16 01:40:44,634 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt requirements.txt
-2022-04-16 01:40:44,635 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json wandb-metadata.json
-2022-04-16 01:40:44,635 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log output.log
-2022-04-16 01:40:44,642 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml conda-environment.yaml
-2022-04-16 01:40:44,644 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json wandb-summary.json
-2022-04-16 01:40:44,644 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/config.yaml config.yaml
-2022-04-16 01:40:44,644 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/diff.patch diff.patch
-2022-04-16 01:40:44,646 INFO    SenderThread:4584 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py code/train_translation.py
-2022-04-16 01:40:44,646 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 6
-2022-04-16 01:40:44,647 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:44,647 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:44,647 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 6
-2022-04-16 01:40:44,649 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:44,649 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 6
-2022-04-16 01:40:44,649 INFO    SenderThread:4584 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:40:44,649 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 7
-2022-04-16 01:40:44,651 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:44,651 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 7
-2022-04-16 01:40:44,651 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1745897
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:44,651 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:44,652 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 7
-2022-04-16 01:40:44,753 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,419 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 8
-2022-04-16 01:40:45,420 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,421 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:45,421 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 8
-2022-04-16 01:40:45,422 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:45,422 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1745897
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:45,423 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 8
-2022-04-16 01:40:45,424 INFO    SenderThread:4584 [sender.py:send_request_defer():342] send defer: 9
-2022-04-16 01:40:45,426 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: defer
-2022-04-16 01:40:45,426 INFO    HandlerThread:4584 [handler.py:handle_request_defer():141] handle defer: 9
-2022-04-16 01:40:45,426 DEBUG   SenderThread:4584 [sender.py:send():179] send: final
-2022-04-16 01:40:45,427 DEBUG   SenderThread:4584 [sender.py:send():179] send: footer
-2022-04-16 01:40:45,427 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: defer
-2022-04-16 01:40:45,427 INFO    SenderThread:4584 [sender.py:send_request_defer():304] handle sender defer: 9
-2022-04-16 01:40:45,524 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,525 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,526 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1745897
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:45,627 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,628 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,629 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1750664
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:45,730 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,731 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,732 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:45,834 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,835 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,836 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:45,938 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:45,940 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:45,942 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,043 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,044 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,045 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,147 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,148 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,149 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,218 INFO    Thread-35 :4584 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt
-2022-04-16 01:40:46,227 INFO    Thread-37 :4584 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
-2022-04-16 01:40:46,246 INFO    Thread-36 :4584 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:46,250 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,253 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,255 INFO    Thread-38 :4584 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:46,257 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,272 INFO    Thread-39 :4584 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
-2022-04-16 01:40:46,358 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,359 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,360 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,462 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,462 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,464 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,565 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: poll_exit
-2022-04-16 01:40:46,566 DEBUG   SenderThread:4584 [sender.py:send_request():193] send_request: poll_exit
-2022-04-16 01:40:46,566 INFO    SenderThread:4584 [file_pusher.py:join():181] waiting for file pusher
-2022-04-16 01:40:46,567 INFO    MainThread:4584 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
-exit_result {
-}
-file_counts {
-  wandb_count: 7
-  other_count: 1
-}
-pusher_stats {
-  uploaded_bytes: 1756683
-  total_bytes: 1756683
-}
-
-2022-04-16 01:40:46,569 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: get_summary
-2022-04-16 01:40:46,571 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: sampled_history
-2022-04-16 01:40:46,575 DEBUG   HandlerThread:4584 [handler.py:handle_request():124] handle_request: shutdown
-2022-04-16 01:40:46,575 INFO    HandlerThread:4584 [handler.py:finish():638] shutting down handler
-2022-04-16 01:40:47,428 INFO    WriterThread:4584 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb
-2022-04-16 01:40:47,567 INFO    SenderThread:4584 [sender.py:finish():933] shutting down sender
-2022-04-16 01:40:47,567 INFO    SenderThread:4584 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:40:47,567 INFO    SenderThread:4584 [file_pusher.py:join():181] waiting for file pusher
-2022-04-16 01:40:47,579 INFO    MainThread:4584 [wandb_run.py:_show_summary():1785] rendering summary
-2022-04-16 01:40:47,579 INFO    MainThread:4584 [wandb_run.py:_show_history():1823] rendering history
-2022-04-16 01:40:47,580 INFO    MainThread:4584 [wandb_run.py:_show_files():1852] logging synced files
-2022-04-16 01:40:47,627 INFO    MainThread:4584 [internal.py:handle_exit():78] Internal process exited
diff --git a/wandb/run-20220416_013544-2rw6cucs/logs/debug.log b/wandb/run-20220416_013544-2rw6cucs/logs/debug.log
deleted file mode 100644
index e2cfa8d..0000000
--- a/wandb/run-20220416_013544-2rw6cucs/logs/debug.log
+++ /dev/null
@@ -1,96 +0,0 @@
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/logs/debug.log
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/logs/debug-internal.log
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_init.py:init():369] calling init triggers
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:35:44,729 INFO    MainThread:4584 [wandb_init.py:init():418] starting backend
-2022-04-16 01:35:44,734 INFO    MainThread:4584 [backend.py:ensure_launched():132] starting backend process...
-2022-04-16 01:35:44,735 INFO    wandb_internal:4584 [internal.py:wandb_internal():91] W&B internal server running at pid: 4584, started at: 2022-04-16 01:35:44.734800
-2022-04-16 01:35:44,735 INFO    MainThread:4584 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:35:44,736 INFO    MainThread:4584 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:35:44,738 INFO    MainThread:4584 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:35:44,738 INFO    MainThread:4584 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:35:44,740 INFO    WriterThread:4584 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb
-2022-04-16 01:35:45,091 INFO    MainThread:4584 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:35:45,091 INFO    MainThread:4584 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:35:45,945 INFO    MainThread:4584 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:35:45,951 INFO    SenderThread:4584 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files
-2022-04-16 01:35:45,951 INFO    SenderThread:4584 [sender.py:_start_run_threads():707] run started: 2rw6cucs with start time 1650053144
-2022-04-16 01:35:45,952 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/diff.patch
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/code/train_translation.py
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/requirements.txt
-2022-04-16 01:35:46,952 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/code
-2022-04-16 01:35:47,660 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:35:47,661 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:35:47,661 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:35:47,668 INFO    MainThread:4584 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:35:47,670 INFO    MainThread:4584 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:35:47,670 INFO    MainThread:4584 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:35:47,671 INFO    MainThread:4584 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/conda-environment.yaml
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-metadata.json
-2022-04-16 01:35:47,951 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:49,366 INFO    Thread-14 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/2pht4hd1-wandb-metadata.json
-2022-04-16 01:35:49,466 INFO    Thread-16 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/1v7xd8v7-code/train_translation.py
-2022-04-16 01:35:49,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:50,313 INFO    Thread-22 :4584 [upload_job.py:push():133] Uploaded file /tmp/tmp43zrqffgwandb/2zhfst8q-diff.patch
-2022-04-16 01:35:50,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/config.yaml
-2022-04-16 01:35:51,953 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:35:53,954 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:36:01,747 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:36:01,956 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:36:01,957 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:36:13,960 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:02,754 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:02,975 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:09,307 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:09,982 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:09,982 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:23,988 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:27,989 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:28,465 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:28,992 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:29,992 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:42,029 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:46,029 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:37:46,462 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:37:47,033 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:37:48,033 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:02,037 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:06,038 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:07,262 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:38:08,288 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:38:08,364 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:20,291 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:24,293 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:25,471 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:38:26,500 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:38:26,500 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:38:40,504 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:25,384 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:25,519 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:32,020 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:32,545 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:32,545 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:44,548 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:48,550 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:39:49,589 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:39:50,604 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:39:50,605 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:04,608 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:08,350 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:40:08,610 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:08,610 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:24,614 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:28,328 INFO    SenderThread:4584 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:40:28,621 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/wandb-summary.json
-2022-04-16 01:40:28,621 INFO    Thread-11 :4584 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_013544-2rw6cucs/files/output.log
-2022-04-16 01:40:42,389 INFO    MainThread:4584 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2rw6cucs
diff --git a/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb b/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb
deleted file mode 100644
index f34d5f1..0000000
Binary files a/wandb/run-20220416_013544-2rw6cucs/run-2rw6cucs.wandb and /dev/null differ
diff --git a/wandb/run-20220416_014133-qw6te5do/files/code/train_translation.py b/wandb/run-20220416_014133-qw6te5do/files/code/train_translation.py
deleted file mode 100644
index 245e045..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/code/train_translation.py
+++ /dev/null
@@ -1,405 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-#                wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-        predicted.append(out)
-        for i in range(len(tgt_out)): 
-            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print('out', out)
-        print('predicted', tgt_out)
-
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-        
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert_dict, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-    for i in range(len(tgt_tokens)): 
-        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml b/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220416_014133-qw6te5do/files/config.yaml b/wandb/run-20220416_014133-qw6te5do/files/config.yaml
deleted file mode 100644
index 52b4100..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 16
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 10
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220416_014133-qw6te5do/files/diff.patch b/wandb/run-20220416_014133-qw6te5do/files/diff.patch
deleted file mode 100644
index 290700b..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/diff.patch
+++ /dev/null
@@ -1,30813 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..f8b257c 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,198 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-+train_translation.py
-+train_translation.py
-+train_translation.py
-+train_translation.py --load=1
-+train_translation.py --load=1
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 66}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 179}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 16}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 184}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 240}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 296}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 352}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 408}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 464}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 692}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 106}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 441}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 188}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-+{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 70}
-+{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 70}
-+{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 116}
-+{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 116}
-+{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 117}
-+{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 164}
-+{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 165}
-+{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 182}
-+{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 183}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-+{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 73}
-+{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 74}
-+{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 92}
-+{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 93}
-+{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 93}
-+{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 110}
-+{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 111}
-+{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 131}
-+{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 132}
-+{"epoch": 5, "step": 60, "loss": 62.27414321899414, "time": 149}
-+{"epoch": 5, "step": 65, "loss": 90.9207992553711, "time": 150}
-+{"epoch": 5, "step": 70, "loss": 66.96754455566406, "time": 150}
-+{"epoch": 6, "step": 75, "loss": 71.40245819091797, "time": 216}
-+{"epoch": 6, "step": 80, "loss": 63.940818786621094, "time": 217}
-+{"epoch": 7, "step": 85, "loss": 50.857147216796875, "time": 233}
-+{"epoch": 7, "step": 90, "loss": 78.37335205078125, "time": 234}
-+{"epoch": 7, "step": 95, "loss": 100.13611602783203, "time": 234}
-+{"epoch": 8, "step": 100, "loss": 80.35195922851562, "time": 252}
-+{"epoch": 8, "step": 105, "loss": 86.00081634521484, "time": 253}
-+{"epoch": 9, "step": 110, "loss": 82.35330200195312, "time": 272}
-+{"epoch": 9, "step": 115, "loss": 88.81517791748047, "time": 273}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..245e045 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -47,9 +48,9 @@ parser = argparse.ArgumentParser(description = 'Translation')
- # Training hyper-parameters: 
- parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                     help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
-+parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                     help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-+parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                     help='mini-batch size')
- parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                     help='base learning rate')
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -267,7 +269,7 @@ def main_worker(gpu, args):
-                 optimizer.step()
-                 # losses += loss.item()
-                 
--                # wandb.log({'iter_loss': loss})
-+#                wandb.log({'iter_loss': loss})
-                 epoch_loss += loss.item()
-                 t += 1 
-                 torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,19 +327,26 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-         predicted.append(out)
-+        for i in range(len(tgt_out)): 
-+            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print('out', out)
-+        print('predicted', tgt_out)
-+
-                 
-         try: 
-             bleu_score(predicted, target)
-         except: 
-             predicted.pop()
-             target.pop()
-+        
-             
-         bleu = bleu_score(predicted, target)
- 
-@@ -375,7 +384,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert_dict, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +392,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+    for i in range(len(tgt_tokens)): 
-+        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..267a045 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220416_014133-qw6te5do/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..2534ff1 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220416_014133-qw6te5do/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..659d09a 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220416_014133-qw6te5do
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220416_014133-qw6te5do/files/output.log b/wandb/run-20220416_014133-qw6te5do/files/output.log
deleted file mode 100644
index 2515324..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/output.log
+++ /dev/null
@@ -1,90 +0,0 @@
-
-train_translation.py --load=0
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 5}
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 6}
-translation model saved in checkpoint
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10103, 13054, 10108, 37727, 10104, 10372, 11913, 10127, 11053,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10125, 10103, 29263, 11280, 21152, 10108, 10103, 16451, 14086,   117,
-        11312, 14693, 10173, 54633, 10150, 10110, 29605, 10142, 10104, 10103,
-        11134, 13896, 11523, 14650, 10346, 10103, 15152, 10139, 14299, 57616,
-        14666,   131, 10103, 20202,   117, 12851, 37727, 10110, 45430,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([21113, 10127,   143, 12050, 11913, 10139, 24850,   119,   102],
-       device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([79481, 11229, 10346, 14356, 20550, 10139, 29785, 14262, 10110, 42136,
-        12090, 32837, 10104, 13214, 10982, 16993, 52378, 10320, 85197, 10285,
-        71132,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10844, 10104, 10103, 22151, 13170,   117, 79481, 11229, 10346, 14356,
-        20550, 10139, 10144, 28194, 23209, 10108, 10103, 22389, 10472,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([123, 119, 102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([124, 119, 102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([43959, 10139, 13498,   117, 11497, 10110, 13044, 19394, 10107, 14975,
-        10551, 40127, 11229, 10346, 19164,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([21113,   112,   161, 12763, 16894, 10438, 31377, 47461, 10563, 10104,
-        16769, 10868,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([69066, 10139, 10103, 19569, 10110, 10103, 96237, 14650, 14989, 22107,
-        57616, 10104, 10367, 20532,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10103, 35040, 11312, 55257, 10104, 10103, 12878, 10916, 10868, 11229,
-        19524, 10487, 11982, 10125, 57616, 10104, 10372, 11913, 10770, 10103,
-        29468, 10114, 10695,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10104, 10372, 27195,   117, 10103, 35458, 10108, 12851, 37727,   117,
-        45430,   117,   143, 12050, 14149, 19569, 10110, 13293, 11168, 24264,
-        11229, 10346, 14758, 17156,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([  143, 33533, 10108, 10103, 73444, 18116,   118, 22389, 17593, 10104,
-        10595, 10151,   124, 43689, 12819, 11229, 10346, 21509, 10104, 10103,
-        10403, 11125, 10139, 12851,   118, 10573, 31176,   119,   102],
-       device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10103, 11594,   112,   161, 22853, 13651, 59343, 10114, 32097, 52958,
-        10203, 29263, 68350, 10107, 13208, 13594, 36616, 14094, 19382, 10125,
-        10103, 19569,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([15929, 44909, 77949, 10503, 12325, 10103, 12485, 10285, 12238, 14650,
-        10346, 31377,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10103, 34763, 10127, 10114, 85270, 65343, 10218, 11497, 10110, 53938,
-          119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([42416, 72829, 10108, 10246, 18454, 76601, 11229, 10346, 17200,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([34321, 11229, 10346, 15227, 10114, 30562, 10103, 18785, 95044, 12705,
-        10108, 10103, 11481, 34029,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([  125,   119, 15636, 10110, 35054, 11229, 14989, 22107, 11232,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10197, 11229, 10346, 78832, 10171, 22418, 14856, 10110, 21516, 19771,
-          119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10668, 10348,   118, 14370, 12325, 11865, 10110, 12077, 10127, 19641,
-        43131, 12652,   119,   102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([35645, 42888, 10123, 14358, 10104, 14149, 10287, 10110, 27089, 14194,
-        12315, 11229, 11923, 10144, 12652, 11892, 10104, 10372, 27195,   119,
-          102], device='cuda:0')
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted tensor([10770, 10372, 22151, 13170,   117, 33189, 10125,   143, 23676, 14463,
-        10108, 10482, 28781, 10171, 11498, 11229, 10346, 41755, 22117,   119,
diff --git a/wandb/run-20220416_014133-qw6te5do/files/requirements.txt b/wandb/run-20220416_014133-qw6te5do/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220416_014133-qw6te5do/files/wandb-metadata.json b/wandb/run-20220416_014133-qw6te5do/files/wandb-metadata.json
deleted file mode 100644
index 9966d97..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/wandb-metadata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T20:11:34.454604",
-    "startedAt": "2022-04-15T20:11:33.272426",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [
-        "--load=0"
-    ],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json b/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
deleted file mode 100644
index b7216e0..0000000
--- a/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 137.94474399089813, "_runtime": 15, "_timestamp": 1650053508, "_step": 0}
\ No newline at end of file
diff --git a/wandb/run-20220416_014133-qw6te5do/logs/debug-internal.log b/wandb/run-20220416_014133-qw6te5do/logs/debug-internal.log
deleted file mode 100644
index a91c8d3..0000000
--- a/wandb/run-20220416_014133-qw6te5do/logs/debug-internal.log
+++ /dev/null
@@ -1,84 +0,0 @@
-2022-04-16 01:41:33,302 INFO    MainThread:6469 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:41:33,304 INFO    wandb_internal:6469 [internal.py:wandb_internal():91] W&B internal server running at pid: 6469, started at: 2022-04-16 01:41:33.301961
-2022-04-16 01:41:33,305 INFO    MainThread:6469 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:41:33,305 DEBUG   MainThread:6469 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-16 01:41:33,307 INFO    MainThread:6469 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:41:33,309 INFO    MainThread:6469 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:41:33,312 INFO    WriterThread:6469 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb
-2022-04-16 01:41:33,314 DEBUG   SenderThread:6469 [sender.py:send():179] send: header
-2022-04-16 01:41:33,314 DEBUG   HandlerThread:6469 [handler.py:handle_request():124] handle_request: check_version
-2022-04-16 01:41:33,315 DEBUG   SenderThread:6469 [sender.py:send_request():193] send_request: check_version
-2022-04-16 01:41:33,652 INFO    MainThread:6469 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:41:33,653 INFO    MainThread:6469 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:41:33,656 DEBUG   SenderThread:6469 [sender.py:send():179] send: run
-2022-04-16 01:41:34,451 INFO    MainThread:6469 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:41:34,451 INFO    SenderThread:6469 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files
-2022-04-16 01:41:34,451 INFO    SenderThread:6469 [sender.py:_start_run_threads():707] run started: qw6te5do with start time 1650053493
-2022-04-16 01:41:34,451 DEBUG   SenderThread:6469 [sender.py:send():179] send: summary
-2022-04-16 01:41:34,452 DEBUG   HandlerThread:6469 [handler.py:handle_request():124] handle_request: run_start
-2022-04-16 01:41:34,452 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:41:34,454 DEBUG   HandlerThread:6469 [meta.py:__init__():39] meta init
-2022-04-16 01:41:34,454 DEBUG   HandlerThread:6469 [meta.py:__init__():53] meta init done
-2022-04-16 01:41:34,454 DEBUG   HandlerThread:6469 [meta.py:probe():210] probe
-2022-04-16 01:41:34,460 DEBUG   HandlerThread:6469 [meta.py:_setup_git():200] setup git
-2022-04-16 01:41:34,480 DEBUG   HandlerThread:6469 [meta.py:_setup_git():207] setup git done
-2022-04-16 01:41:34,481 DEBUG   HandlerThread:6469 [meta.py:_save_code():89] save code
-2022-04-16 01:41:34,489 DEBUG   HandlerThread:6469 [meta.py:_save_code():110] save code done
-2022-04-16 01:41:34,489 DEBUG   HandlerThread:6469 [meta.py:_save_patches():127] save patches
-2022-04-16 01:41:34,554 DEBUG   HandlerThread:6469 [meta.py:_save_patches():169] save patches done
-2022-04-16 01:41:34,554 DEBUG   HandlerThread:6469 [meta.py:_save_pip():57] save pip
-2022-04-16 01:41:34,554 DEBUG   HandlerThread:6469 [meta.py:_save_pip():71] save pip done
-2022-04-16 01:41:34,554 DEBUG   HandlerThread:6469 [meta.py:_save_conda():78] save conda
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/code/train_translation.py
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/requirements.txt
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/diff.patch
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/code
-2022-04-16 01:41:36,139 DEBUG   HandlerThread:6469 [meta.py:_save_conda():86] save conda done
-2022-04-16 01:41:36,139 DEBUG   HandlerThread:6469 [meta.py:probe():252] probe done
-2022-04-16 01:41:36,141 DEBUG   SenderThread:6469 [sender.py:send():179] send: files
-2022-04-16 01:41:36,141 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:41:36,142 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:41:36,142 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:41:36,149 INFO    MainThread:6469 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:41:36,150 DEBUG   HandlerThread:6469 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:41:36,150 DEBUG   SenderThread:6469 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:41:36,150 INFO    MainThread:6469 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:41:36,151 INFO    MainThread:6469 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:41:36,451 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
-2022-04-16 01:41:36,451 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-metadata.json
-2022-04-16 01:41:36,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:36,837 DEBUG   SenderThread:6469 [sender.py:send():179] send: config
-2022-04-16 01:41:37,884 INFO    Thread-14 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2wnhls28-wandb-metadata.json
-2022-04-16 01:41:38,099 INFO    Thread-15 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2wpqbnqv-code/train_translation.py
-2022-04-16 01:41:38,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:38,856 INFO    Thread-22 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2jnxx1qb-diff.patch
-2022-04-16 01:41:39,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/config.yaml
-2022-04-16 01:41:40,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:48,279 DEBUG   SenderThread:6469 [sender.py:send():179] send: history
-2022-04-16 01:41:48,279 DEBUG   SenderThread:6469 [sender.py:send():179] send: summary
-2022-04-16 01:41:48,279 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:41:48,461 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
-2022-04-16 01:41:48,461 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:50,462 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:51,840 DEBUG   HandlerThread:6469 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:41:51,840 DEBUG   SenderThread:6469 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:42:02,488 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:03,061 DEBUG   SenderThread:6469 [sender.py:send():179] send: stats
-2022-04-16 01:42:06,489 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:07,554 DEBUG   HandlerThread:6469 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:42:07,554 DEBUG   SenderThread:6469 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:42:08,489 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:10,490 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:12,491 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:14,492 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:15,200 INFO    MainThread:6469 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-16 01:42:15,200 INFO    MainThread:6469 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:42:15,200 INFO    SenderThread:6469 [sender.py:finish():933] shutting down sender
-2022-04-16 01:42:15,200 INFO    SenderThread:6469 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:42:15,200 INFO    WriterThread:6469 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb
diff --git a/wandb/run-20220416_014133-qw6te5do/logs/debug.log b/wandb/run-20220416_014133-qw6te5do/logs/debug.log
deleted file mode 100644
index 76ddcd1..0000000
--- a/wandb/run-20220416_014133-qw6te5do/logs/debug.log
+++ /dev/null
@@ -1,61 +0,0 @@
-2022-04-16 01:41:33,278 INFO    MainThread:6469 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-16 01:41:33,278 INFO    MainThread:6469 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-16 01:41:33,279 INFO    MainThread:6469 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/logs/debug.log
-2022-04-16 01:41:33,279 INFO    MainThread:6469 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/logs/debug-internal.log
-2022-04-16 01:41:33,279 INFO    MainThread:6469 [wandb_init.py:init():369] calling init triggers
-2022-04-16 01:41:33,280 INFO    MainThread:6469 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:41:33,280 INFO    MainThread:6469 [wandb_init.py:init():418] starting backend
-2022-04-16 01:41:33,301 INFO    MainThread:6469 [backend.py:ensure_launched():132] starting backend process...
-2022-04-16 01:41:33,302 INFO    MainThread:6469 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:41:33,304 INFO    wandb_internal:6469 [internal.py:wandb_internal():91] W&B internal server running at pid: 6469, started at: 2022-04-16 01:41:33.301961
-2022-04-16 01:41:33,305 INFO    MainThread:6469 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:41:33,307 INFO    MainThread:6469 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:41:33,309 INFO    MainThread:6469 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:41:33,312 INFO    WriterThread:6469 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb
-2022-04-16 01:41:33,652 INFO    MainThread:6469 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:41:33,653 INFO    MainThread:6469 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:41:34,451 INFO    MainThread:6469 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:41:34,451 INFO    SenderThread:6469 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files
-2022-04-16 01:41:34,451 INFO    SenderThread:6469 [sender.py:_start_run_threads():707] run started: qw6te5do with start time 1650053493
-2022-04-16 01:41:34,452 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
-2022-04-16 01:41:35,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/code/train_translation.py
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/requirements.txt
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/diff.patch
-2022-04-16 01:41:35,453 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/code
-2022-04-16 01:41:36,141 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:41:36,142 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:41:36,142 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:41:36,149 INFO    MainThread:6469 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:41:36,150 INFO    MainThread:6469 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:41:36,151 INFO    MainThread:6469 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:41:36,152 INFO    MainThread:6469 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:41:36,451 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/conda-environment.yaml
-2022-04-16 01:41:36,451 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-metadata.json
-2022-04-16 01:41:36,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:37,884 INFO    Thread-14 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2wnhls28-wandb-metadata.json
-2022-04-16 01:41:38,099 INFO    Thread-15 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2wpqbnqv-code/train_translation.py
-2022-04-16 01:41:38,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:38,856 INFO    Thread-22 :6469 [upload_job.py:push():133] Uploaded file /tmp/tmp7a2m2v__wandb/2jnxx1qb-diff.patch
-2022-04-16 01:41:39,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/config.yaml
-2022-04-16 01:41:40,452 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:48,279 INFO    SenderThread:6469 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:41:48,461 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/wandb-summary.json
-2022-04-16 01:41:48,461 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:41:50,462 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:02,488 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:06,489 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:08,489 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:10,490 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:12,491 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:14,492 INFO    Thread-11 :6469 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/files/output.log
-2022-04-16 01:42:15,200 INFO    MainThread:6469 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-16 01:42:15,200 INFO    MainThread:6469 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:42:15,200 INFO    SenderThread:6469 [sender.py:finish():933] shutting down sender
-2022-04-16 01:42:15,200 INFO    SenderThread:6469 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:42:15,200 INFO    WriterThread:6469 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb
diff --git a/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb b/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb
deleted file mode 100644
index ff87007..0000000
Binary files a/wandb/run-20220416_014133-qw6te5do/run-qw6te5do.wandb and /dev/null differ
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py b/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py
deleted file mode 100644
index a5d5e46..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py
+++ /dev/null
@@ -1,405 +0,0 @@
-import numpy as np
-from pathlib import Path
-import argparse
-import json
-import math
-import os
-import random
-import signal
-import subprocess
-import sys
-import time
-
-import torch
-from torch import nn, optim 
-from torch.nn import Transformer 
-import torchtext
-import t_dataset
-from t_dataset import  Translation_dataset_t
-from t_dataset import  MyCollate
-import translation_utils 
-from translation_utils import TokenEmbedding, PositionalEncoding 
-from translation_utils import create_mask
-from transformers import BertModel 
-from transformers import AutoTokenizer
-from torch import Tensor
-from torchtext.data.metrics import bleu_score
-from models import Translator
-from models import BarlowTwins
-
-import wandb 
-
-
-#import barlow
-os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
-os.environ['WANDB_START_METHOD'] = 'thread'
-os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
-
-MANUAL_SEED = 4444
-
-random.seed(MANUAL_SEED)
-np.random.seed(MANUAL_SEED)
-torch.manual_seed(MANUAL_SEED)
-torch.backends.cudnn.deterministic = True
-
-
-parser = argparse.ArgumentParser(description = 'Translation') 
-
-# Training hyper-parameters: 
-parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                    help='number of data loader workers') 
-parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                    help='number of total epochs to run')
-parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                    help='mini-batch size')
-parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                    help='base learning rate')
-parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
-                    help='dropout for training translation transformer')
-parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
-                    help='weight decay')
-parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
-                    help='momentum for sgd')
-parser.add_argument('--clip', default=1, type=float, metavar='GC',
-                    help='Gradient Clipping')
-parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
-                    help='betas for Adam Optimizer')
-parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
-                    help='eps for Adam optimizer')
-parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
-                    help='loss function for translation')
-parser.add_argument('--optimizer', default='adam', type=str, metavar='OP',
-                    help='selecting optimizer')
-
-# Transformer parameters: 
-parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
-                    help='dimension of transformer encoder')
-parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                    help= 'number of heads in transformer') 
-parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                    help= 'dimension of feedforward layer in transformer encoder') 
-parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                   help='number of layers of transformer encoder') 
-parser.add_argument('--projector', default='768-256', type=str,
-                    metavar='MLP', help='projector MLP')
-
-# Tokenizer: 
-parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
-                metavar='T', help= 'tokenizer')
-parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
-                    help='Dimension of mbert output')
-# Paths: 
-parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
-                    metavar='DIR', help='path to checkpoint directory')
-
-# to load or barlow or not: 
-parser.add_argument('--load', default=0, type=int,
-                    metavar='DIR', help='to load barlow twins encoder or not')
-
-# calculate bleu: 
-parser.add_argument('--checkbleu', default=5 , type=int,
-                    metavar='BL', help='check bleu after these number of epochs')
-# train or test dataset
-parser.add_argument('--train', default=True , type=bool,
-                    metavar='T', help='selecting train set')
-
-parser.add_argument('--print_freq', default=5 , type=int,
-                    metavar='PF', help='frequency of printing and saving stats')
-
-parser.add_argument('--test_translation', default=0, type=int, 
-                    metavar='TT', help='testing translation_score')
-''' NOTE: 
-        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-'''
-
-args = parser.parse_args()
-# print(args.load)
-os.environ["TOKENIZERS_PARALLELISM"] = "true"
-
-def main(): 
-
-    # print("entered main")
-    args.ngpus_per_node = torch.cuda.device_count()
-    if 'SLURM_JOB_ID' in os.environ:
-        # single-node and multi-node distributed training on SLURM cluster
-        # requeue job on SLURM preemption
-        signal.signal(signal.SIGUSR1, handle_sigusr1)
-        signal.signal(signal.SIGTERM, handle_sigterm)
-        # find a common host name on all nodes
-        # assume scontrol returns hosts in the same order on all nodes
-        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
-        stdout = subprocess.check_output(cmd.split())
-        host_name = stdout.decode().splitlines()[0]
-        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
-        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
-        args.dist_url = f'tcp://{host_name}:58472'
-    else:
-        # single-node distributed training
-        args.rank = 0
-        args.dist_url = 'tcp://localhost:58472'
-        args.world_size = args.ngpus_per_node
-    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
-
-
-def main_worker(gpu, args):
-    
-    args.rank += gpu
-    torch.distributed.init_process_group(
-        backend='nccl', init_method=args.dist_url,
-        world_size=args.world_size, rank=args.rank)
-
-    if args.rank == 0:
-
-        wandb.init(config=args, project='translation_test')#############################################
-        wandb.config.update(args)
-        config = wandb.config
-    
-        # exit()
-        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
-        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
-        print(' '.join(sys.argv))
-        print(' '.join(sys.argv), file=stats_file)
-
-    torch.cuda.set_device(gpu)
-    torch.backends.cudnn.benchmark = True
-
-    dataset = Translation_dataset_t(train=args.train) 
-    src_vocab_size = dataset.de_vocab_size
-    trg_vocab_size = dataset.en_vocab_size
-    tokenizer = dataset.tokenizer  
-    pad_idx = tokenizer.pad_token_id
-    sos_idx = tokenizer.cls_token_id 
-    eos_idx = tokenizer.sep_token_id
-
-#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
-    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
-    # print(src_vocab_size, trg_vocab_size)
-    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
-    transformer = Transformer(d_model=args.dmodel, 
-                              nhead=args.nhead, 
-                              num_encoder_layers=args.nlayers, 
-                              num_decoder_layers = args.nlayers, 
-                              dim_feedforward=args.dfeedforward, 
-                              dropout=args.dropout)
-    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
-    # print(model.state_dict)
-#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
-
-    # args.load = False
-
-    if args.load == 1: 
-        # print(args.load)
-        # print('inside')
-        print('loading barlow model')
-        t_enc = model.transformer.encoder
-        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
-        ### note: lambd is just a placeholder
-        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
-                            map_location='cpu')
-        barlow.load_state_dict(ckpt['model'])
-        model.transformer.encoder = barlow.transformer_enc        
-        model.mbert = barlow.mbert
-    '''
-    to_do: 
-    if post_train: 
-        torch.load(model.states_dict)
-        model.transformer.encoder = model_barlow
-
-    '''
-#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
-
-    param_weights = []
-    param_biases = []
-    for param in model.parameters():
-        if param.ndim == 1:
-            param_biases.append(param)
-        else:
-            param_weights.append(param)
-    parameters = [{'params': param_weights}, {'params': param_biases}]
-    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
-
-###########################################################
-    if args.optimizer == 'adam':
-        optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
-    else: 
-        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) 
-    
-    if args.loss_fn == 'cross_entropy': 
-        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
-##############################################################
-
-    start_epoch = 0 
-
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
-
-    assert args.batch_size % args.world_size == 0
-    per_device_batch_size = args.batch_size // args.world_size
-    id2bert_dict = dataset.id2bert_dict
-    ###############################
-    loader = torch.utils.data.DataLoader(
-         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-   
-    test_loader = torch.utils.data.DataLoader(
-         dataset, batch_size=1, num_workers=args.workers,
-         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
-    #############################
-    start_time = time.time()
-
-
-    if not args.test_translation: 
-
-        for epoch in range(start_epoch, args.epochs):
-            sampler.set_epoch(epoch)
-            epoch_loss = 0 
-            t = 0 
-            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
-                src = sent[0].cuda(gpu, non_blocking=True)
-                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
-                tgt_out = sent[3].cuda(gpu, non_blocking=True)
-                
-                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
-                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
-                
-                optimizer.zero_grad()
-
-                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
-                loss.backward()
-
-                optimizer.step()
-                # losses += loss.item()
-                
-#                wandb.log({'iter_loss': loss})
-                epoch_loss += loss.item()
-                t += 1 
-                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-                
-                if step % args.print_freq == 0:
-                    if args.rank == 0:
-                        stats = dict(epoch=epoch, step=step,
-                                    loss=loss.item(),
-                                    time=int(time.time() - start_time))
-                        print(json.dumps(stats))
-                        print(json.dumps(stats), file=stats_file)
-            if args.rank == 0:
-
-                wandb.log({"epoch_loss":epoch_loss/t})
-                # save checkpoint
-                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
-                            optimizer=optimizer.state_dict())
-                # print(model.state_dict)
-                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
-                print('translation model saved in', args.checkpoint_dir)
-            
-    ##############################################################
-            if args.rank == 0: 
-                if epoch%args.checkbleu ==0 : 
-
-                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                    wandb.log({'bleu_score': bleu_score}) 
-    #            print(bleu_score(predicted, target))
-    ##############################################################
-    #        if epoch%1 ==0 : 
-    #            torch.save(model.module.state_dict(),
-    #                   'path.pth')
-    #            print("Model is saved")
-            # if args.rank == 0:
-            #     # save checkpoint
-            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
-            #                  optimizer=optimizer.state_dict())
-            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
-            #     print('saved translation model in', args.checkpoint_dir)
-        wandb.finish()
-            
-    else: 
-
-        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-        print('test_bleu_score', bleu_score)
-        if args.rank == 0: 
-            wandb.log({'bleu_score': bleu_score})
-
-
-def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
-
-    model.eval()
-    predicted=[]
-    target=[]
-            
-    for i in test_loader: 
-        src = i[0].cuda(gpu, non_blocking=True)
-#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-        tgt_out = i[3].cuda(gpu, non_blocking=True)
-        num_tokens = src.shape[0]
-
-        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
-        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-        predicted.append(out)
-        for i in range(len(tgt_out)): 
-            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-        print('out', out)
-        print('predicted', tokenizer.convert_ids_to_tokens(tgt_out))
-
-                
-        try: 
-            bleu_score(predicted, target)
-        except: 
-            predicted.pop()
-            target.pop()
-        
-            
-        bleu = bleu_score(predicted, target)
-
-    return bleu
-
-'''
-todo: 
-    BLEU score
-'''
-
-# function to generate output sequence using greedy algorithm 
-def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
-    src = src
-    src_mask = src_mask
-
-    memory = model.module.encode(src, src_mask)
-    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
-    for i in range(max_len-1):
-        memory = memory
-        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
-                    .type(torch.bool)).cuda(gpu, non_blocking=True)
-        out = model.module.decode(ys, memory, tgt_mask)
-        out = out.transpose(0, 1)
-        prob = model.module.generator(out[:, -1])
-        _, next_word = torch.max(prob, dim=1)
-        next_word = next_word.item()
-
-        ys = torch.cat([ys,
-                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
-        if next_word == eos_idx:
-            break
-    return ys
-
-
-# actual function to translate input sentence into target language
-def translate(model: torch.nn.Module, 
-        src: torch.tensor, 
-        tokenizer,src_mask, id2bert_dict, gpu):
-    model.eval()
-    
-    num_tokens = src.shape[0]
-    
-    
-    tgt_tokens = greedy_decode(
-        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-    
-    for i in range(len(tgt_tokens)): 
-        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-#    print(tgt_tokens)
-
-    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
-
-
-if __name__ == '__main__': 
-    main()
-    wandb.finish()
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml b/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
deleted file mode 100644
index fd74d2b..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
+++ /dev/null
@@ -1,158 +0,0 @@
-name: ectc
-channels:
-  - pytorch
-  - defaults
-dependencies:
-  - _libgcc_mutex=0.1=main
-  - _openmp_mutex=4.5=1_gnu
-  - blas=1.0=mkl
-  - brotlipy=0.7.0=py37h27cfd23_1003
-  - bzip2=1.0.8=h7b6447c_0
-  - ca-certificates=2022.3.18=h06a4308_0
-  - certifi=2021.10.8=py37h06a4308_2
-  - cffi=1.15.0=py37hd667e15_1
-  - cryptography=36.0.0=py37h9ce1e76_0
-  - cudatoolkit=11.3.1=h2bc3f7f_2
-  - ffmpeg=4.3=hf484d3e_0
-  - freetype=2.11.0=h70c0345_0
-  - giflib=5.2.1=h7b6447c_0
-  - gmp=6.2.1=h2531618_2
-  - gnutls=3.6.15=he1e5248_0
-  - idna=3.3=pyhd3eb1b0_0
-  - intel-openmp=2021.4.0=h06a4308_3561
-  - jpeg=9d=h7f8727e_0
-  - lame=3.100=h7b6447c_0
-  - lcms2=2.12=h3be6417_0
-  - ld_impl_linux-64=2.35.1=h7274673_9
-  - libffi=3.3=he6710b0_2
-  - libgcc-ng=9.3.0=h5101ec6_17
-  - libgomp=9.3.0=h5101ec6_17
-  - libiconv=1.15=h63c8f33_5
-  - libidn2=2.3.2=h7f8727e_0
-  - libpng=1.6.37=hbc83047_0
-  - libstdcxx-ng=9.3.0=hd4cf53a_17
-  - libtasn1=4.16.0=h27cfd23_0
-  - libtiff=4.2.0=h85742a9_0
-  - libunistring=0.9.10=h27cfd23_0
-  - libuv=1.40.0=h7b6447c_0
-  - libwebp=1.2.2=h55f646e_0
-  - libwebp-base=1.2.2=h7f8727e_0
-  - lz4-c=1.9.3=h295c915_1
-  - mkl=2021.4.0=h06a4308_640
-  - mkl-service=2.4.0=py37h7f8727e_0
-  - mkl_fft=1.3.1=py37hd3c417c_0
-  - mkl_random=1.2.2=py37h51133e4_0
-  - ncurses=6.3=h7f8727e_2
-  - nettle=3.7.3=hbbd107a_1
-  - numpy-base=1.21.2=py37h79a1101_0
-  - openh264=2.1.1=h4ff587b_0
-  - openssl=1.1.1n=h7f8727e_0
-  - pip=21.2.2=py37h06a4308_0
-  - pycparser=2.21=pyhd3eb1b0_0
-  - pyopenssl=22.0.0=pyhd3eb1b0_0
-  - pysocks=1.7.1=py37_1
-  - python=3.7.11=h12debd9_0
-  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
-  - pytorch-mutex=1.0=cuda
-  - readline=8.1.2=h7f8727e_1
-  - requests=2.27.1=pyhd3eb1b0_0
-  - setuptools=58.0.4=py37h06a4308_0
-  - six=1.16.0=pyhd3eb1b0_1
-  - sqlite=3.38.0=hc218d9a_0
-  - tk=8.6.11=h1ccaba5_0
-  - torchaudio=0.11.0=py37_cu113
-  - typing_extensions=4.1.1=pyh06a4308_0
-  - wheel=0.37.1=pyhd3eb1b0_0
-  - xz=5.2.5=h7b6447c_0
-  - zlib=1.2.11=h7f8727e_4
-  - zstd=1.4.9=haebb681_0
-  - pip:
-    - aiohttp==3.8.1
-    - aiosignal==1.2.0
-    - antlr4-python3-runtime==4.8
-    - async-timeout==4.0.2
-    - asynctest==0.13.0
-    - attrs==21.4.0
-    - backcall==0.2.0
-    - bitarray==2.4.1
-    - blessings==1.7
-    - charset-normalizer==2.0.12
-    - click==8.0.4
-    - colorama==0.4.4
-    - configparser==5.2.0
-    - cython==0.29.28
-    - datasets==1.16.1
-    - debugpy==1.6.0
-    - decorator==5.1.1
-    - dill==0.3.4
-    - docker-pycreds==0.4.0
-    - entrypoints==0.4
-    - fastbpe==0.1.0
-    - filelock==3.6.0
-    - frozenlist==1.3.0
-    - fsspec==2022.2.0
-    - gitdb==4.0.9
-    - gitpython==3.1.27
-    - gpustat==0.6.0
-    - huggingface-hub==0.4.0
-    - hydra-core==1.0.7
-    - importlib-metadata==4.11.3
-    - importlib-resources==5.6.0
-    - ipykernel==6.12.1
-    - ipython==7.32.0
-    - jedi==0.18.1
-    - joblib==1.1.0
-    - jupyter-client==7.2.2
-    - jupyter-core==4.9.2
-    - matplotlib-inline==0.1.3
-    - mock==4.0.3
-    - multidict==6.0.2
-    - multiprocess==0.70.12.2
-    - nest-asyncio==1.5.5
-    - numpy==1.21.5
-    - nvidia-ml-py3==7.352.0
-    - omegaconf==2.0.6
-    - packaging==21.3
-    - pandas==1.3.5
-    - parso==0.8.3
-    - pathtools==0.1.2
-    - pexpect==4.8.0
-    - pickleshare==0.7.5
-    - pillow==9.0.1
-    - portalocker==2.4.0
-    - promise==2.3
-    - prompt-toolkit==3.0.29
-    - protobuf==3.19.4
-    - psutil==5.9.0
-    - ptyprocess==0.7.0
-    - pyarrow==7.0.0
-    - pygments==2.11.2
-    - pyparsing==3.0.7
-    - python-dateutil==2.8.2
-    - pytz==2022.1
-    - pyyaml==6.0
-    - pyzmq==22.3.0
-    - regex==2022.3.15
-    - sacrebleu==2.0.0
-    - sacremoses==0.0.49
-    - sentry-sdk==1.5.8
-    - shortuuid==1.0.8
-    - smmap==5.0.0
-    - subprocess32==3.5.4
-    - subword-nmt==0.3.8
-    - tabulate==0.8.9
-    - tokenizers==0.10.3
-    - torch==1.11.0
-    - torchtext==0.12.0
-    - torchvision==0.9.1
-    - tornado==6.1
-    - tqdm==4.63.1
-    - traitlets==5.1.1
-    - transformers==4.14.1
-    - urllib3==1.26.9
-    - wandb==0.10.31
-    - wcwidth==0.2.5
-    - xxhash==3.0.0
-    - yarl==1.7.2
-    - zipp==3.7.0
-prefix: /home/ivlabs/miniconda3/envs/ectc
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/config.yaml b/wandb/run-20220416_014323-1a0lobwa/files/config.yaml
deleted file mode 100644
index 52b4100..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/config.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-wandb_version: 1
-
-_wandb:
-  desc: null
-  value:
-    cli_version: 0.10.31
-    code_path: code/train_translation.py
-    framework: huggingface
-    huggingface_version: 4.14.1
-    is_jupyter_run: false
-    is_kaggle_kernel: false
-    python_version: 3.7.11
-    t:
-      1:
-      - 1
-      - 11
-      4: 3.7.11
-      5: 0.10.31
-      6: 4.14.1
-      8:
-      - 8
-batch_size:
-  desc: null
-  value: 16
-betas:
-  desc: null
-  value:
-  - 0.9
-  - 0.98
-checkbleu:
-  desc: null
-  value: 5
-checkpoint_dir:
-  desc: null
-  value: checkpoint
-clip:
-  desc: null
-  value: 1
-dfeedforward:
-  desc: null
-  value: 200
-dist_url:
-  desc: null
-  value: tcp://localhost:58472
-dmodel:
-  desc: null
-  value: 768
-dropout:
-  desc: null
-  value: 0.01
-epochs:
-  desc: null
-  value: 10
-eps:
-  desc: null
-  value: 1.0e-09
-learning_rate:
-  desc: null
-  value: 0.2
-load:
-  desc: null
-  value: 0
-loss_fn:
-  desc: null
-  value: cross_entropy
-mbert_out_size:
-  desc: null
-  value: 768
-momentum:
-  desc: null
-  value: 0.9
-ngpus_per_node:
-  desc: null
-  value: 2
-nhead:
-  desc: null
-  value: 4
-nlayers:
-  desc: null
-  value: 3
-optimizer:
-  desc: null
-  value: adam
-print_freq:
-  desc: null
-  value: 5
-projector:
-  desc: null
-  value: 768-256
-rank:
-  desc: null
-  value: 0
-test_translation:
-  desc: null
-  value: 0
-tokenizer:
-  desc: null
-  value: bert-base-multilingual-uncased
-train:
-  desc: null
-  value: true
-weight_decay:
-  desc: null
-  value: 1.0e-06
-workers:
-  desc: null
-  value: 4
-world_size:
-  desc: null
-  value: 2
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/diff.patch b/wandb/run-20220416_014323-1a0lobwa/files/diff.patch
deleted file mode 100644
index 5f2c089..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/diff.patch
+++ /dev/null
@@ -1,30817 +0,0 @@
-diff --git a/__pycache__/barlow_utils.cpython-37.pyc b/__pycache__/barlow_utils.cpython-37.pyc
-index 3c0d4fe..b13b62f 100644
-Binary files a/__pycache__/barlow_utils.cpython-37.pyc and b/__pycache__/barlow_utils.cpython-37.pyc differ
-diff --git a/__pycache__/models.cpython-37.pyc b/__pycache__/models.cpython-37.pyc
-index 3bbb9de..acc1737 100644
-Binary files a/__pycache__/models.cpython-37.pyc and b/__pycache__/models.cpython-37.pyc differ
-diff --git a/__pycache__/t_dataset.cpython-37.pyc b/__pycache__/t_dataset.cpython-37.pyc
-index 2650733..c4b566b 100644
-Binary files a/__pycache__/t_dataset.cpython-37.pyc and b/__pycache__/t_dataset.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-37.pyc b/__pycache__/translation_utils.cpython-37.pyc
-index 60c9eda..12c22a5 100644
-Binary files a/__pycache__/translation_utils.cpython-37.pyc and b/__pycache__/translation_utils.cpython-37.pyc differ
-diff --git a/__pycache__/translation_utils.cpython-38.pyc b/__pycache__/translation_utils.cpython-38.pyc
-index 061d0e7..a1e7877 100644
-Binary files a/__pycache__/translation_utils.cpython-38.pyc and b/__pycache__/translation_utils.cpython-38.pyc differ
-diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
-index 884dd9c..83f30a6 100644
---- a/checkpoint/stats.txt
-+++ b/checkpoint/stats.txt
-@@ -833,3 +833,202 @@ train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 -
- {"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
- {"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
- {"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 4}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 5}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 6}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 7}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 7}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 8}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 8}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 9}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 8}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 15}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 183}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 239}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 295}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 351}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 407}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 463}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 355}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 606}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.121065616607666, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 97.44178771972656, "time": 10}
-+{"epoch": 0, "step": 10, "loss": 168.33328247070312, "time": 12}
-+{"epoch": 0, "step": 15, "loss": 133.17933654785156, "time": 12}
-+{"epoch": 0, "step": 20, "loss": 112.3768539428711, "time": 13}
-+{"epoch": 0, "step": 25, "loss": 120.29653930664062, "time": 14}
-+{"epoch": 0, "step": 30, "loss": 119.97941589355469, "time": 15}
-+{"epoch": 0, "step": 35, "loss": 86.40515899658203, "time": 16}
-+{"epoch": 0, "step": 40, "loss": 70.5906982421875, "time": 17}
-+train_translation.py
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 28}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 155}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 281}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 405}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 530}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 657}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 783}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 908}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 1033}
-+train_translation.py
-+train_translation.py
-+train_translation.py
-+train_translation.py --load=1
-+train_translation.py --load=1
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 65}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 178}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 9}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 37}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 66}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 94}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 122}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 150}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 179}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 207}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 235}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 16}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 72}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 128}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 184}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 240}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 296}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 352}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 408}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 464}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 692}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 20}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 188}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 272}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 356}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 439}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 523}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 607}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 690}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 105}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 440}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.079373359680176, "time": 21}
-+{"epoch": 0, "step": 5, "loss": 100.0801773071289, "time": 106}
-+{"epoch": 0, "step": 10, "loss": 157.312744140625, "time": 189}
-+{"epoch": 0, "step": 15, "loss": 78.03355407714844, "time": 273}
-+{"epoch": 0, "step": 20, "loss": 85.30223083496094, "time": 357}
-+{"epoch": 0, "step": 25, "loss": 79.75176239013672, "time": 441}
-+{"epoch": 0, "step": 30, "loss": 123.69627380371094, "time": 524}
-+{"epoch": 0, "step": 35, "loss": 70.34227752685547, "time": 608}
-+{"epoch": 0, "step": 40, "loss": 108.36054229736328, "time": 691}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 19}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 104}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 188}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-+{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 70}
-+{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 70}
-+{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 116}
-+{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 116}
-+{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 117}
-+{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 164}
-+{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 165}
-+{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 182}
-+{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 183}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 6}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 7}
-+{"epoch": 1, "step": 15, "loss": 138.67442321777344, "time": 73}
-+{"epoch": 1, "step": 20, "loss": 75.6456298828125, "time": 74}
-+{"epoch": 2, "step": 25, "loss": 64.19247436523438, "time": 92}
-+{"epoch": 2, "step": 30, "loss": 65.62056732177734, "time": 93}
-+{"epoch": 2, "step": 35, "loss": 66.36638641357422, "time": 93}
-+{"epoch": 3, "step": 40, "loss": 77.29269409179688, "time": 110}
-+{"epoch": 3, "step": 45, "loss": 68.74011993408203, "time": 111}
-+{"epoch": 4, "step": 50, "loss": 74.82659912109375, "time": 131}
-+{"epoch": 4, "step": 55, "loss": 77.39452362060547, "time": 132}
-+{"epoch": 5, "step": 60, "loss": 62.27414321899414, "time": 149}
-+{"epoch": 5, "step": 65, "loss": 90.9207992553711, "time": 150}
-+{"epoch": 5, "step": 70, "loss": 66.96754455566406, "time": 150}
-+{"epoch": 6, "step": 75, "loss": 71.40245819091797, "time": 216}
-+{"epoch": 6, "step": 80, "loss": 63.940818786621094, "time": 217}
-+{"epoch": 7, "step": 85, "loss": 50.857147216796875, "time": 233}
-+{"epoch": 7, "step": 90, "loss": 78.37335205078125, "time": 234}
-+{"epoch": 7, "step": 95, "loss": 100.13611602783203, "time": 234}
-+{"epoch": 8, "step": 100, "loss": 80.35195922851562, "time": 252}
-+{"epoch": 8, "step": 105, "loss": 86.00081634521484, "time": 253}
-+{"epoch": 9, "step": 110, "loss": 82.35330200195312, "time": 272}
-+{"epoch": 9, "step": 115, "loss": 88.81517791748047, "time": 273}
-+train_translation.py --load=0
-+{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 5}
-+{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 5}
-+{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 6}
-diff --git a/t_dataset.py b/t_dataset.py
-index c7ab181..53d5caa 100644
---- a/t_dataset.py
-+++ b/t_dataset.py
-@@ -20,19 +20,19 @@ class Translation_dataset_t(Dataset):
-             split = "train" 
-         else: 
-             split = "test" 
--        self.dataset = load_dataset('wmt14', "de-en", split=split) 
-+        self.dataset = load_dataset('opus_rf', "de-en", split=split) 
-         self.de_list = []
-         self.en_list = []
- #        self.tokenizer = tokenizer
-         self.tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-uncased')
--        dataset = load_dataset('opus_rf', 'de-en', split='train')
-         en_list_2 = []
--        for n, i in enumerate(dataset): 
-+        for n, i in enumerate(self.dataset): 
-             en_list_2.append(i['translation']['en'].lower())
- 
-         a1 = list(self.tokenizer(en_list_2, padding=True, return_tensors='pt')['input_ids'])
-         self.en_vocab, self.en_vocab_size = vocab(a1)
-         self.bert2id_dict = translation_utils.bert2id(self.en_vocab)
-+        self.id2bert_dict = translation_utils.id2bert(self.en_vocab)
-         
-         for i in self.dataset: 
-             self.de_list.append(self.tokenizer(i['translation']['de'].lower(), 
-diff --git a/train_translation.py b/train_translation.py
-index eea074a..a5d5e46 100644
---- a/train_translation.py
-+++ b/train_translation.py
-@@ -33,6 +33,7 @@ import wandb
- #import barlow
- os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
- os.environ['WANDB_START_METHOD'] = 'thread'
-+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
- 
- MANUAL_SEED = 4444
- 
-@@ -47,9 +48,9 @@ parser = argparse.ArgumentParser(description = 'Translation')
- # Training hyper-parameters: 
- parser.add_argument('--workers', default=4, type=int, metavar='N', 
-                     help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
-+parser.add_argument('--epochs', default=10, type=int, metavar='N',
-                     help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
-+parser.add_argument('--batch_size', default=16, type=int, metavar='n',
-                     help='mini-batch size')
- parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
-                     help='base learning rate')
-@@ -75,9 +76,9 @@ parser.add_argument('--dmodel', default=768, type=int, metavar='T',
-                     help='dimension of transformer encoder')
- parser.add_argument('--nhead', default=4, type= int, metavar='N', 
-                     help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=500, type=int, metavar='F', 
-+parser.add_argument('--dfeedforward', default=200, type=int, metavar='F', 
-                     help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=8, type=int, metavar= 'N', 
-+parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
-                    help='number of layers of transformer encoder') 
- parser.add_argument('--projector', default='768-256', type=str,
-                     metavar='MLP', help='projector MLP')
-@@ -233,6 +234,7 @@ def main_worker(gpu, args):
- 
-     assert args.batch_size % args.world_size == 0
-     per_device_batch_size = args.batch_size // args.world_size
-+    id2bert_dict = dataset.id2bert_dict
-     ###############################
-     loader = torch.utils.data.DataLoader(
-          dataset, batch_size=per_device_batch_size, num_workers=args.workers,
-@@ -267,7 +269,7 @@ def main_worker(gpu, args):
-                 optimizer.step()
-                 # losses += loss.item()
-                 
--                # wandb.log({'iter_loss': loss})
-+#                wandb.log({'iter_loss': loss})
-                 epoch_loss += loss.item()
-                 t += 1 
-                 torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-@@ -293,7 +295,7 @@ def main_worker(gpu, args):
-             if args.rank == 0: 
-                 if epoch%args.checkbleu ==0 : 
- 
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
-+                    bleu_score = checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu)
-                     wandb.log({'bleu_score': bleu_score}) 
-     #            print(bleu_score(predicted, target))
-     ##############################################################
-@@ -311,13 +313,13 @@ def main_worker(gpu, args):
-             
-     else: 
- 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
-+        bleu_score = checkbleu(model,tokenizer, test_loader, id2bert_dict, gpu )
-         print('test_bleu_score', bleu_score)
-         if args.rank == 0: 
-             wandb.log({'bleu_score': bleu_score})
- 
- 
--def checkbleu(model, tokenizer, test_loader, gpu): 
-+def checkbleu(model, tokenizer, test_loader, id2bert_dict, gpu): 
- 
-     model.eval()
-     predicted=[]
-@@ -325,19 +327,26 @@ def checkbleu(model, tokenizer, test_loader, gpu):
-             
-     for i in test_loader: 
-         src = i[0].cuda(gpu, non_blocking=True)
-+#        tgt_out = i[1][1:, : ].cuda(gpu, non_blocking=True)
-         tgt_out = i[3].cuda(gpu, non_blocking=True)
-         num_tokens = src.shape[0]
- 
-         src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
-+        out = translate(model, src, tokenizer, src_mask, id2bert_dict, gpu)
-         predicted.append(out)
-+        for i in range(len(tgt_out)): 
-+            tgt_out[i] = id2bert_dict[tgt_out[i].item()]
-         target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
-+        print('out', out)
-+        print('predicted', tokenizer.convert_ids_to_tokens(tgt_out))
-+
-                 
-         try: 
-             bleu_score(predicted, target)
-         except: 
-             predicted.pop()
-             target.pop()
-+        
-             
-         bleu = bleu_score(predicted, target)
- 
-@@ -375,7 +384,7 @@ def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
- # actual function to translate input sentence into target language
- def translate(model: torch.nn.Module, 
-         src: torch.tensor, 
--        tokenizer,src_mask, gpu):
-+        tokenizer,src_mask, id2bert_dict, gpu):
-     model.eval()
-     
-     num_tokens = src.shape[0]
-@@ -383,6 +392,11 @@ def translate(model: torch.nn.Module,
-     
-     tgt_tokens = greedy_decode(
-         model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
-+    
-+    for i in range(len(tgt_tokens)): 
-+        tgt_tokens[i] = id2bert_dict[tgt_tokens[i].item()]
-+#    print(tgt_tokens)
-+
-     return tokenizer.convert_ids_to_tokens(tgt_tokens) 
- 
- 
-diff --git a/translation_dataset.py b/translation_dataset.py
-index 274c2f3..82270c6 100644
---- a/translation_dataset.py
-+++ b/translation_dataset.py
-@@ -11,7 +11,7 @@ class Translation_dataset(Dataset):
-     
-     def __init__(self):
-       
--        self.dataset = load_dataset('wmt14', "de-en", split="train") 
-+        self.dataset = load_dataset('opus_rf', "de-en", split="train") 
-         self.de_list = []
-         self.en_list = []
- 
-diff --git a/translation_utils.py b/translation_utils.py
-index 6c66f53..4b3b830 100644
---- a/translation_utils.py
-+++ b/translation_utils.py
-@@ -31,6 +31,13 @@ def bert2id(de_list: set):
-     
-     return label_dict
- 
-+def id2bert(de_list: set): 
-+    label_dict = {}
-+    for n, i in enumerate(de_list): 
-+        label_dict[n] = i
-+    
-+    return label_dict
-+
- def generate_square_subsequent_mask(sz):
-     mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
-     mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
-@@ -81,10 +88,10 @@ class TokenEmbedding(nn.Module):
-         super(TokenEmbedding, self).__init__()
-         # self.embedding = nn.Embedding(vocab_size, emb_size)
-         self.embedding = mbert
--#         for param in self.embedding.parameters():
--#             param.requires_grad = False
--#         for param in self.embedding.pooler.parameters():
--#             param.requires_grad = True
-+        for param in self.embedding.parameters():
-+            param.requires_grad = False
-+        for param in self.embedding.pooler.parameters():
-+            param.requires_grad = True
-         self.emb_size = emb_size
- 
-     def forward(self, tokens: torch.tensor):
-diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
-index 6163657..5c95722 120000
---- a/wandb/debug-internal.log
-+++ b/wandb/debug-internal.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug-internal.log
-\ No newline at end of file
-+run-20220416_014323-1a0lobwa/logs/debug-internal.log
-\ No newline at end of file
-diff --git a/wandb/debug.log b/wandb/debug.log
-index 7d0f5dd..c54d1ec 120000
---- a/wandb/debug.log
-+++ b/wandb/debug.log
-@@ -1 +1 @@
--run-20220409_182749-paufev36/logs/debug.log
-\ No newline at end of file
-+run-20220416_014323-1a0lobwa/logs/debug.log
-\ No newline at end of file
-diff --git a/wandb/latest-run b/wandb/latest-run
-index f11d588..34b339f 120000
---- a/wandb/latest-run
-+++ b/wandb/latest-run
-@@ -1 +1 @@
--run-20220409_182749-paufev36
-\ No newline at end of file
-+run-20220416_014323-1a0lobwa
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py b/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-deleted file mode 100644
-index 9236ace..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
-+++ /dev/null
-@@ -1,350 +0,0 @@
--# Copyright (c) Facebook, Inc. and its affiliates.
--# All rights reserved.
--#
--# This source code is licensed under the license found in the
--# LICENSE file in the root directory of this source tree.
--
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--from translation_dataset import Translation_dataset
--from translation_dataset import MyCollate
--from transformers import BertModel
--from transformers import AutoTokenizer
--from torch import nn, optim
--import torch
--from t_dataset import Translation_dataset_t
--from torch.nn import Transformer
--from models import BarlowTwins
--from models import Translator
--from barlow_utils import off_diagonal 
--import wandb 
--#from _config import Config 
--#config = Config.config
--
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--#setting random seeds
--SEED = 4444
--
--random.seed(SEED)
--np.random.seed(SEED)
--torch.manual_seed(SEED)
--torch.cuda.manual_seed(SEED)
--torch.backends.cudnn.deterministic = True
--
--
--
--
--parser = argparse.ArgumentParser(description='Barlow Twins Training')
--# parser.add_batch_sizeargument('data', type=Path, metavar='DIR',
--#                     help='path to dataset')
--
--
--
--# Training parameters: 
--parser.add_argument('--workers', default=20, type=int, metavar='N',
--                    help='number of data loader workers')
--parser.add_argument('--epochs', default=2, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=64, type=int, metavar='N',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate-weights', default=0.2, type=float, metavar='LR',
--                    help='base learning rate for weights')
--parser.add_argument('--learning-rate-biases', default=0.0048, type=float, metavar='LR',
--                 help='base learning rate for biases and batch norm parameters')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--lambd', default=0.0051, type=float, metavar='L',
--                    help='weight on off-diagonal terms')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--
--# Model parameters:
--parser.add_argument('--projector', default='768-768', type=str,
--                    metavar='MLP', help='projector MLP')
--parser.add_argument('--print-freq', default=100, type=int, metavar='N',
--                    help='print frequency')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=3, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--dropout', default=0.0051, type=float, metavar= 'D', 
--                   help='dropout in transformer') 
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-cased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint-dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--parser.add_argument('--load', default=1, type=int,
--                    metavar='LO', help='load weights from translation model')
--
--args = parser.parse_args()
--
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main():
--
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--        wandb.init(config=args)#############################################
--        # wandb.config.update(args)
--        config = wandb.config
--        # print(args.lambd, config.lambd)
--        # wandb.finish()
--        # exibatch_sizet()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=False)
--    t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    mbert = BertModel.from_pretrained(args.tokenizer)
--    model = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=args.lambd).cuda(gpu)
--    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--    optimizer = LARS(parameters, lr=0, weight_decay=args.weight_decay,
--                     weight_decay_filter=True,
--                     lars_adaptation_filter=True)
--    # optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
--
--    # automatically resume from checkpoint if it exists
--    # if (args.checkpoint_dir / 'checkpoint.pth').is_file():
--    #     ckpt = torch.load(args.checkpoint_dir / 'checkpoint.pth',
--    #                       map_location='cpu')
--    #     start_epoch = ckpt['epoch']
--    #     # print("model=",model)
--    #     # print("ckpt=",ckpt['model'])
--    #     model.load_state_dict(ckpt['model'])
--    #     optimizer.load_state_dict(ckpt['optimizer'])
--    # else:
--
--    trans_dataset = Translation_dataset_t(train=True)
--    src_vocab_size = trans_dataset.de_vocab_size 
--    tgt_vocab_size = trans_dataset.en_vocab_size
--    tokenizer = trans_dataset.tokenizer
--    transformer = Transformer(d_model=args.dmodel, 
--                                   nhead=args.nhead, 
--                                   num_encoder_layers=args.nlayers,
--                                   num_decoder_layers=args.nlayers, 
--                                   dim_feedforward=args.dfeedforward, 
--                                   dropout=args.dropout)
--    print(args.batch_size)
--    translation_model = Translator(mbert, 
--            transformer,
--            tgt_vocab_size=tgt_vocab_size,
--            emb_size=args.mbert_out_size)
--    
--    if args.load == 1 : 
--        print('loading translation model')
--        ckpt = torch.load(args.checkpoint_dir / 'translation_checkpoint.pth') #,map_location='cpu')
--        translation_model.load_state_dict(ckpt['model'])
--        model.transformer_enc = translation_model.transformer.encoder
--        model.mbert = translation_model.tok_emb.embedding
--        
--    start_epoch = 0
--
--
--    ################################
--    # dataset = torchvision.datasets.ImageFolder(args.data / 'train', Transform())
--    # sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--    ###############################
--
--    dataset = Translation_dataset()
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate())
--    #############################
--    start_time = time.time()
--    scaler = torch.cuda.amp.GradScaler()
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            y1 = sent[0].cuda(gpu, non_blocking=True)
--            y2 = sent[1].cuda(gpu, non_blocking=True)
--            adjust_learning_rate(args, optimizer, loader, step)
--            optimizer.zero_grad()
--            with torch.cuda.amp.autocast(): 
--                _, loss = model.forward(y1, y2)
--                wandb.log({'iter_loss':loss})
--#               print(loss.item())
--                epoch_loss += loss.item()
--            scaler.scale(loss).backward()
--            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
--            scaler.step(optimizer)
--            scaler.update()
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 lr_weights=optimizer.param_groups[0]['lr'],
--                                 lr_biases=optimizer.param_groups[1]['lr'],
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.state_dict(),
--                         optimizer=optimizer.state_dict())
--            torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--            print('barlow model saved in', args.checkpoint_dir)
--            for sent in test_loader: 
--                y1 = sent[0].cuda(gpu, non_blocking=True)
--                y2 = sent[1].cuda(gpu, non_blocking=True)
--                model.eval()
--                c, _ = model(y1, y2)
--                xlabels = tokenizer.convert_ids_to_tokens(y2)
--                ylabels = tokenizer.convert_ids_to_tokens(y1)
--    wandb.finish()
--#    if args.rank == 0:
--#        save final model
--#        torch.save(model.module.state_dict(),
--#                    args.checkpoint_dir / 'translation.pth')
--
--
--def adjust_learning_rate(args, optimizer, loader, step):
--    max_steps = args.epochs * len(loader)
--    warmup_steps = 10 * len(loader)
--    base_lr = args.batch_size / 256
--    if step < warmup_steps:
--        lr = base_lr * step / warmup_steps
--    else:
--        step -= warmup_steps
--        max_steps -= warmup_steps
--        q = 0.5 * (1 + math.cos(math.pi * step / max_steps))
--        end_lr = base_lr * 0.001
--        lr = base_lr * q + end_lr * (1 - q)
--    optimizer.param_groups[0]['lr'] = lr * args.learning_rate_weights
--    optimizer.param_groups[1]['lr'] = lr * args.learning_rate_biases
--
--
--def handle_sigusr1(signum, frame):
--    os.system(f'scontrol requeue {os.getenv("SLURM_JOB_ID")}')
--    exit()
--
--
--def handle_sigterm(signum, frame):
--    pass
--
--
--class LARS(optim.Optimizer):
--    def __init__(self, params, lr, weight_decay=0, momentum=0.9, eta=0.001,
--                 weight_decay_filter=False, lars_adaptation_filter=False):
--        defaults = dict(lr=lr, weight_decay=weight_decay, momentum=momentum,
--                        eta=eta, weight_decay_filter=weight_decay_filter,
--                        lars_adaptation_filter=lars_adaptation_filter)
--        super().__init__(params, defaults)
--
--
--    def exclude_bias_and_norm(self, p):
--        return p.ndim == 1
--
--    @torch.no_grad()
--    def step(self):
--        for g in self.param_groups:
--            for p in g['params']:
--                dp = p.grad
--
--                if dp is None:
--                    continue
--
--                if not g['weight_decay_filter'] or not self.exclude_bias_and_norm(p):
--                    dp = dp.add(p, alpha=g['weight_decay'])
--
--                if not g['lars_adaptation_filter'] or not self.exclude_bias_and_norm(p):
--                    param_norm = torch.norm(p)
--                    update_norm = torch.norm(dp)
--                    one = torch.ones_like(param_norm)
--                    q = torch.where(param_norm > 0.,
--                                    torch.where(update_norm > 0,
--                                                (g['eta'] * param_norm / update_norm), one), one)
--                    dp = dp.mul(q)
--
--                param_state = self.state[p]
--                if 'mu' not in param_state:
--                    param_state['mu'] = torch.zeros_like(p)
--                mu = param_state['mu']
--                mu.mul_(g['momentum']).add_(dp)
--
--                p.add_(mu, alpha=-g['lr'])
--
--
--if __name__ == '__main__':
--    try:  
--      main()
--    except KeyboardInterrupt:
--      print('Interrupted')
--      wandb.finish()
--      try:
--          sys.exit(0)
--      except SystemExit:
--          os._exit(0)
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml b/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/config.yaml b/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-deleted file mode 100644
-index 147470d..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/config.yaml
-+++ /dev/null
-@@ -1,90 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/barlow.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.0051
--epochs:
--  desc: null
--  value: 2
--lambd:
--  desc: null
--  value: 0.0051
--learning_rate_biases:
--  desc: null
--  value: 0.0048
--learning_rate_weights:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 3
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 100
--projector:
--  desc: null
--  value: 768-768
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-cased
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 20
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/output.log b/wandb/run-20220406_171518-s7zesus8/files/output.log
-deleted file mode 100644
-index 847ffbb..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/output.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--
--barlow.py --load 0
--Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Error in sys.excepthook:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 47, in getlines
--    return updatecache(filename, module_globals)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/linecache.py", line 136, in updatecache
--    with tokenize.open(fullname) as fp:
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/tokenize.py", line 447, in open
--    buffer = _builtin_open(filename, 'rb')
--KeyboardInterrupt
--Original exception was:
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt b/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-deleted file mode 100644
-index 5f93d29..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,21 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-06T11:45:20.215162",
--    "startedAt": "2022-04-06T11:45:18.613420",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_enhancement/barlow.py",
--    "codePath": "barlow.py",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json b/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log b/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-deleted file mode 100644
-index 0630656..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
-+++ /dev/null
-@@ -1,91 +0,0 @@
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,622 DEBUG   MainThread:16786 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: check_version
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send():179] send: header
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:18,626 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: check_version
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:19,155 DEBUG   SenderThread:16786 [sender.py:send():179] send: run
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:19,158 DEBUG   SenderThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 DEBUG   SenderThread:16786 [sender.py:send():179] send: summary
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:20,211 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: run_start
--2022-04-06 17:15:20,214 DEBUG   HandlerThread:16786 [meta.py:__init__():39] meta init
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:__init__():53] meta init done
--2022-04-06 17:15:20,215 DEBUG   HandlerThread:16786 [meta.py:probe():210] probe
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [git.py:repo():33] git repository is invalid
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():89] save code
--2022-04-06 17:15:20,220 DEBUG   HandlerThread:16786 [meta.py:_save_code():110] save code done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():57] save pip
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_pip():71] save pip done
--2022-04-06 17:15:20,221 DEBUG   HandlerThread:16786 [meta.py:_save_conda():78] save conda
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,240 DEBUG   HandlerThread:16786 [meta.py:_save_conda():86] save conda done
--2022-04-06 17:15:22,241 DEBUG   HandlerThread:16786 [meta.py:probe():252] probe done
--2022-04-06 17:15:22,255 DEBUG   SenderThread:16786 [sender.py:send():179] send: files
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 DEBUG   HandlerThread:16786 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-06 17:15:22,262 DEBUG   SenderThread:16786 [sender.py:send_request():193] send_request: stop_status
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/logs/debug.log b/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-deleted file mode 100644
-index 9769176..0000000
---- a/wandb/run-20220406_171518-s7zesus8/logs/debug.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/logs/debug-internal.log
--2022-04-06 17:15:18,614 INFO    MainThread:16786 [wandb_init.py:init():369] calling init triggers
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 20, 'epochs': 2, 'batch_size': 64, 'learning_rate_weights': 0.2, 'learning_rate_biases': 0.0048, 'weight_decay': 1e-06, 'lambd': 0.0051, 'clip': 1, 'projector': '768-768', 'print_freq': 100, 'dmodel': 768, 'nhead': 3, 'dfeedforward': 256, 'nlayers': 3, 'dropout': 0.0051, 'tokenizer': 'bert-base-multilingual-cased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-06 17:15:18,615 INFO    MainThread:16786 [wandb_init.py:init():418] starting backend
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():132] starting backend process...
--2022-04-06 17:15:18,619 INFO    MainThread:16786 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-06 17:15:18,620 INFO    wandb_internal:16786 [internal.py:wandb_internal():91] W&B internal server running at pid: 16786, started at: 2022-04-06 17:15:18.619828
--2022-04-06 17:15:18,620 INFO    MainThread:16786 [wandb_init.py:init():423] backend started and connected
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():465] updated telemetry
--2022-04-06 17:15:18,625 INFO    MainThread:16786 [wandb_init.py:init():484] communicating current version
--2022-04-06 17:15:18,626 INFO    WriterThread:16786 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.12 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-06 17:15:19,154 INFO    MainThread:16786 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:20,208 INFO    SenderThread:16786 [sender.py:_start_run_threads():707] run started: s7zesus8 with start time 1649245518
--2022-04-06 17:15:20,210 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-06 17:15:20,211 INFO    MainThread:16786 [wandb_init.py:init():522] starting run threads in backend
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:21,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code
--2022-04-06 17:15:22,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:22,255 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-06 17:15:22,256 INFO    SenderThread:16786 [sender.py:_save_file():829] saving file code/barlow.py with policy now
--2022-04-06 17:15:22,261 INFO    MainThread:16786 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-06 17:15:22,262 INFO    MainThread:16786 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-06 17:15:22,264 INFO    MainThread:16786 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-06 17:15:22,266 INFO    MainThread:16786 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-06 17:15:23,209 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:23,210 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json
--2022-04-06 17:15:23,555 INFO    Thread-14 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/2ggqgylt-wandb-metadata.json
--2022-04-06 17:15:23,635 INFO    Thread-17 :16786 [upload_job.py:push():133] Uploaded file /tmp/tmp8udrbs4mwandb/56j3ha1n-code/barlow.py
--2022-04-06 17:15:25,349 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:28,351 INFO    Thread-11 :16786 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [sender.py:finish():933] shutting down sender
--2022-04-06 17:15:29,273 INFO    WriterThread:16786 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
--2022-04-06 17:15:29,273 INFO    SenderThread:16786 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt requirements.txt
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-metadata.json wandb-metadata.json
--2022-04-06 17:15:29,351 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log output.log
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml conda-environment.yaml
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json wandb-summary.json
--2022-04-06 17:15:29,352 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml config.yaml
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/code/barlow.py code/barlow.py
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-06 17:15:29,354 INFO    SenderThread:16786 [file_pusher.py:join():181] waiting for file pusher
--2022-04-06 17:15:30,676 INFO    Thread-23 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/output.log
--2022-04-06 17:15:30,684 INFO    Thread-26 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/config.yaml
--2022-04-06 17:15:30,686 INFO    Thread-22 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/requirements.txt
--2022-04-06 17:15:30,694 INFO    Thread-24 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/conda-environment.yaml
--2022-04-06 17:15:30,730 INFO    Thread-25 :16786 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_enhancement/wandb/run-20220406_171518-s7zesus8/files/wandb-summary.json
--2022-04-06 17:15:31,674 ERROR   wandb_internal:16786 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-06 17:17:48,865 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,946 INFO    MainThread:16786 [wandb_run.py:_restore():1480] restore
--2022-04-06 17:17:48,947 INFO    MainThread:16786 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb b/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb
-deleted file mode 100644
-index cd7ebea..0000000
-Binary files a/wandb/run-20220406_171518-s7zesus8/run-s7zesus8.wandb and /dev/null differ
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py b/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml b/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-deleted file mode 100644
-index f15df21..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch b/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-deleted file mode 100644
-index 0ddeae0..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
-+++ /dev/null
-@@ -1,226 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2158287 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,87 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..ee4c0ff 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..29be718 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..bda663d 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145845-d3rkwo1k
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/output.log b/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-deleted file mode 100644
-index 4d74c7d..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt b/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-deleted file mode 100644
-index 9eb0f02..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:28:48.101605",
--    "startedAt": "2022-04-08T09:28:45.736549",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json b/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-deleted file mode 100644
-index 5708b15..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.139744758605957, "_runtime": 22, "_timestamp": 1649410147, "_step": 1, "epoch_loss": 7.139744758605957}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-deleted file mode 100644
-index e57e276..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
-+++ /dev/null
-@@ -1,74 +0,0 @@
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,745 DEBUG   MainThread:63630 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send():179] send: header
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:45,753 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:46,531 DEBUG   SenderThread:63630 [sender.py:send():179] send: run
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:48,099 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():39] meta init
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:__init__():53] meta init done
--2022-04-08 14:58:48,101 DEBUG   HandlerThread:63630 [meta.py:probe():210] probe
--2022-04-08 14:58:48,107 DEBUG   HandlerThread:63630 [meta.py:_setup_git():200] setup git
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:58:48,124 DEBUG   HandlerThread:63630 [meta.py:_save_code():89] save code
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_code():110] save code done
--2022-04-08 14:58:48,132 DEBUG   HandlerThread:63630 [meta.py:_save_patches():127] save patches
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():57] save pip
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:58:48,182 DEBUG   HandlerThread:63630 [meta.py:_save_conda():78] save conda
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:58:49,720 DEBUG   HandlerThread:63630 [meta.py:probe():252] probe done
--2022-04-08 14:58:49,727 DEBUG   SenderThread:63630 [sender.py:send():179] send: files
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,737 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:50,547 DEBUG   SenderThread:63630 [sender.py:send():179] send: config
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:05,549 DEBUG   HandlerThread:63630 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:05,549 DEBUG   SenderThread:63630 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:06,836 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: history
--2022-04-08 14:59:07,365 DEBUG   SenderThread:63630 [sender.py:send():179] send: summary
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log b/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-deleted file mode 100644
-index a6875c4..0000000
---- a/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'd3rkwo1k', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml', 'start_method': 'thread'}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/logs/debug-internal.log
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:58:45,737 INFO    MainThread:63630 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:45,738 INFO    MainThread:63630 [wandb_init.py:init():418] starting backend
--2022-04-08 14:58:45,743 INFO    MainThread:63630 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:58:45,744 INFO    wandb_internal:63630 [internal.py:wandb_internal():91] W&B internal server running at pid: 63630, started at: 2022-04-08 14:58:45.743405
--2022-04-08 14:58:45,744 INFO    MainThread:63630 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:58:45,745 INFO    MainThread:63630 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:58:45,746 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 512, 'epochs': 32, 'nhead': 6, 'nlayers': 4}
--2022-04-08 14:58:45,748 INFO    MainThread:63630 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:58:45,749 INFO    MainThread:63630 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:58:45,753 INFO    WriterThread:63630 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:58:46,531 INFO    MainThread:63630 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files
--2022-04-08 14:58:48,098 INFO    SenderThread:63630 [sender.py:_start_run_threads():707] run started: d3rkwo1k with start time 1649410125
--2022-04-08 14:58:48,098 INFO    MainThread:63630 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:58:48,099 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code/train_translation.py
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/diff.patch
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:58:49,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/requirements.txt
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:49,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/code
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:58:49,727 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:58:49,728 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:58:49,737 INFO    MainThread:63630 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:58:49,739 INFO    MainThread:63630 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:58:49,741 INFO    MainThread:63630 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/conda-environment.yaml
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:50,098 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-metadata.json
--2022-04-08 14:58:52,067 INFO    Thread-14 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2ocynek4-wandb-metadata.json
--2022-04-08 14:58:52,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:52,358 INFO    Thread-15 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2gxjwsey-code/train_translation.py
--2022-04-08 14:58:52,358 INFO    Thread-16 :63630 [upload_job.py:push():133] Uploaded file /tmp/tmpgr3njy6lwandb/2au0uu9d-diff.patch
--2022-04-08 14:58:54,099 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/config.yaml
--2022-04-08 14:58:56,100 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:58:58,133 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:00,168 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/output.log
--2022-04-08 14:59:06,838 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:07,169 INFO    Thread-11 :63630 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145845-d3rkwo1k/files/wandb-summary.json
--2022-04-08 14:59:07,365 INFO    SenderThread:63630 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb b/wandb/run-20220408_145845-d3rkwo1k/run-d3rkwo1k.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py b/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml b/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/config.yaml b/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-deleted file mode 100644
-index d5b49b7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 36
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/diff.patch b/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-deleted file mode 100644
-index 5bddede..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/diff.patch
-+++ /dev/null
-@@ -1,228 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..f7a973d 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,89 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..151b958 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..80b3468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145917-fjhaj183/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..abf5aa3 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145917-fjhaj183
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/output.log b/wandb/run-20220408_145917-fjhaj183/files/output.log
-deleted file mode 100644
-index ceeeb4b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt b/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-deleted file mode 100644
-index 705a1e7..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:18.659644",
--    "startedAt": "2022-04-08T09:29:17.328450",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=36",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json b/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-deleted file mode 100644
-index 1749cae..0000000
---- a/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140841484069824, "_runtime": 16, "_timestamp": 1649410173, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log b/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-deleted file mode 100644
-index 6a2ea0b..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,338 DEBUG   MainThread:63880 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send():179] send: header
--2022-04-08 14:59:17,342 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,342 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:17,943 DEBUG   SenderThread:63880 [sender.py:send():179] send: run
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:18,657 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():39] meta init
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:18,659 DEBUG   HandlerThread:63880 [meta.py:probe():210] probe
--2022-04-08 14:59:18,665 DEBUG   HandlerThread:63880 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:18,685 DEBUG   HandlerThread:63880 [meta.py:_save_code():89] save code
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:18,694 DEBUG   HandlerThread:63880 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:18,749 DEBUG   HandlerThread:63880 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:18,750 DEBUG   HandlerThread:63880 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:20,073 DEBUG   HandlerThread:63880 [meta.py:probe():252] probe done
--2022-04-08 14:59:20,075 DEBUG   SenderThread:63880 [sender.py:send():179] send: files
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 DEBUG   HandlerThread:63880 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,086 DEBUG   SenderThread:63880 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:20,978 DEBUG   SenderThread:63880 [sender.py:send():179] send: config
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: history
--2022-04-08 14:59:33,642 DEBUG   SenderThread:63880 [sender.py:send():179] send: summary
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/logs/debug.log b/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-deleted file mode 100644
-index 5f71fa1..0000000
---- a/wandb/run-20220408_145917-fjhaj183/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjhaj183', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjhaj183.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/logs/debug-internal.log
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 36, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:17,329 INFO    MainThread:63880 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:17,335 INFO    MainThread:63880 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:17,336 INFO    wandb_internal:63880 [internal.py:wandb_internal():91] W&B internal server running at pid: 63880, started at: 2022-04-08 14:59:17.335830
--2022-04-08 14:59:17,336 INFO    MainThread:63880 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:17,338 INFO    MainThread:63880 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:17,339 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 36, 'nhead': 4, 'nlayers': 4}
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:17,341 INFO    MainThread:63880 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:17,342 INFO    WriterThread:63880 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:17,942 INFO    MainThread:63880 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:18,597 INFO    MainThread:63880 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_start_run_threads():707] run started: fjhaj183 with start time 1649410157
--2022-04-08 14:59:18,657 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/diff.patch
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code/train_translation.py
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/requirements.txt
--2022-04-08 14:59:19,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
--2022-04-08 14:59:19,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/code
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:20,075 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:20,076 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:20,085 INFO    MainThread:63880 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:20,087 INFO    MainThread:63880 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:20,088 INFO    MainThread:63880 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:20,089 INFO    MainThread:63880 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:20,657 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/conda-environment.yaml
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-metadata.json
--2022-04-08 14:59:20,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:22,011 INFO    Thread-14 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/jylptjcp-wandb-metadata.json
--2022-04-08 14:59:22,139 INFO    Thread-16 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/1pe5aukq-diff.patch
--2022-04-08 14:59:22,375 INFO    Thread-15 :63880 [upload_job.py:push():133] Uploaded file /tmp/tmp9_iiwlg8wandb/20nxn48w-code/train_translation.py
--2022-04-08 14:59:22,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:23,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/config.yaml
--2022-04-08 14:59:24,658 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:26,659 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/output.log
--2022-04-08 14:59:33,644 INFO    SenderThread:63880 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:33,718 INFO    Thread-11 :63880 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145917-fjhaj183/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb b/wandb/run-20220408_145917-fjhaj183/run-fjhaj183.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py b/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml b/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml b/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-deleted file mode 100644
-index 39ea9ed..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 16
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch b/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-deleted file mode 100644
-index 3de404c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
-+++ /dev/null
-@@ -1,230 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..1036f20 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,91 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..33a9122 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..622b540 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c775116 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_145943-fjlzyv53
--\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/output.log b/wandb/run-20220408_145943-fjlzyv53/files/output.log
-deleted file mode 100644
-index 0a584f7..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt b/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-deleted file mode 100644
-index 321b5fe..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:29:44.714511",
--    "startedAt": "2022-04-08T09:29:43.530748",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=16",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json b/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-deleted file mode 100644
-index 43fa534..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.180241584777832, "_runtime": 16, "_timestamp": 1649410199, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-deleted file mode 100644
-index 1bb5ef6..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,540 DEBUG   MainThread:64131 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send():179] send: header
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,544 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: check_version
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:43,999 DEBUG   SenderThread:64131 [sender.py:send():179] send: run
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:44,712 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():39] meta init
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:__init__():53] meta init done
--2022-04-08 14:59:44,714 DEBUG   HandlerThread:64131 [meta.py:probe():210] probe
--2022-04-08 14:59:44,720 DEBUG   HandlerThread:64131 [meta.py:_setup_git():200] setup git
--2022-04-08 14:59:44,739 DEBUG   HandlerThread:64131 [meta.py:_setup_git():207] setup git done
--2022-04-08 14:59:44,740 DEBUG   HandlerThread:64131 [meta.py:_save_code():89] save code
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_code():110] save code done
--2022-04-08 14:59:44,748 DEBUG   HandlerThread:64131 [meta.py:_save_patches():127] save patches
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_patches():169] save patches done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():57] save pip
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_pip():71] save pip done
--2022-04-08 14:59:44,809 DEBUG   HandlerThread:64131 [meta.py:_save_conda():78] save conda
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:_save_conda():86] save conda done
--2022-04-08 14:59:46,120 DEBUG   HandlerThread:64131 [meta.py:probe():252] probe done
--2022-04-08 14:59:46,122 DEBUG   SenderThread:64131 [sender.py:send():179] send: files
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 DEBUG   HandlerThread:64131 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,133 DEBUG   SenderThread:64131 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,710 DEBUG   SenderThread:64131 [sender.py:send():179] send: config
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: history
--2022-04-08 14:59:59,111 DEBUG   SenderThread:64131 [sender.py:send():179] send: summary
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log b/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-deleted file mode 100644
-index 042323c..0000000
---- a/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fjlzyv53', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml', 'start_method': 'thread'}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug.log
--2022-04-08 14:59:43,531 INFO    MainThread:64131 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/logs/debug-internal.log
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():369] calling init triggers
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 1024, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:43,532 INFO    MainThread:64131 [wandb_init.py:init():418] starting backend
--2022-04-08 14:59:43,537 INFO    MainThread:64131 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 14:59:43,538 INFO    wandb_internal:64131 [internal.py:wandb_internal():91] W&B internal server running at pid: 64131, started at: 2022-04-08 14:59:43.537952
--2022-04-08 14:59:43,539 INFO    MainThread:64131 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 14:59:43,540 INFO    MainThread:64131 [wandb_init.py:init():423] backend started and connected
--2022-04-08 14:59:43,541 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 16, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 6, 'nlayers': 2}
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():465] updated telemetry
--2022-04-08 14:59:43,543 INFO    MainThread:64131 [wandb_init.py:init():484] communicating current version
--2022-04-08 14:59:43,544 INFO    WriterThread:64131 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 14:59:43,999 INFO    MainThread:64131 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files
--2022-04-08 14:59:44,710 INFO    SenderThread:64131 [sender.py:_start_run_threads():707] run started: fjlzyv53 with start time 1649410183
--2022-04-08 14:59:44,711 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:44,711 INFO    MainThread:64131 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code/train_translation.py
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/requirements.txt
--2022-04-08 14:59:45,711 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/diff.patch
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:45,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/code
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 14:59:46,122 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 14:59:46,123 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 14:59:46,133 INFO    MainThread:64131 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 14:59:46,135 INFO    MainThread:64131 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 14:59:46,137 INFO    MainThread:64131 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 14:59:46,712 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/conda-environment.yaml
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-metadata.json
--2022-04-08 14:59:46,713 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:47,796 INFO    Thread-14 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3fbo2hr0-wandb-metadata.json
--2022-04-08 14:59:47,797 INFO    Thread-16 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/pqn45v2p-diff.patch
--2022-04-08 14:59:47,800 INFO    Thread-15 :64131 [upload_job.py:push():133] Uploaded file /tmp/tmpuhuvd94zwandb/3862f493-code/train_translation.py
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/config.yaml
--2022-04-08 14:59:48,715 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:50,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:52,716 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/output.log
--2022-04-08 14:59:59,114 INFO    SenderThread:64131 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 14:59:59,769 INFO    Thread-11 :64131 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_145943-fjlzyv53/files/wandb-summary.json
-diff --git a/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb b/wandb/run-20220408_145943-fjlzyv53/run-fjlzyv53.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py b/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml b/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150006-abict4v2/files/config.yaml b/wandb/run-20220408_150006-abict4v2/files/config.yaml
-deleted file mode 100644
-index 55505a9..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 20
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 8
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150006-abict4v2/files/diff.patch b/wandb/run-20220408_150006-abict4v2/files/diff.patch
-deleted file mode 100644
-index cae01c4..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/diff.patch
-+++ /dev/null
-@@ -1,232 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..a79a795 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,93 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..baa82b6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..79d1f8d 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150006-abict4v2/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..4572147 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150006-abict4v2
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/output.log b/wandb/run-20220408_150006-abict4v2/files/output.log
-deleted file mode 100644
-index 18438a2..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/output.log
-+++ /dev/null
-@@ -1,14 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:261: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-diff --git a/wandb/run-20220408_150006-abict4v2/files/requirements.txt b/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json b/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-deleted file mode 100644
-index f46fef8..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:08.569102",
--    "startedAt": "2022-04-08T09:30:06.988517",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=20",
--        "--nhead=8",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json b/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-deleted file mode 100644
-index 4c47552..0000000
---- a/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.120020389556885, "_runtime": 21, "_timestamp": 1649410227, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log b/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-deleted file mode 100644
-index eb4114e..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
-+++ /dev/null
-@@ -1,71 +0,0 @@
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,998 DEBUG   MainThread:64393 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send():179] send: header
--2022-04-08 15:00:07,002 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:07,002 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:07,447 DEBUG   SenderThread:64393 [sender.py:send():179] send: run
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,565 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:08,566 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:08,568 DEBUG   HandlerThread:64393 [meta.py:__init__():39] meta init
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:08,569 DEBUG   HandlerThread:64393 [meta.py:probe():210] probe
--2022-04-08 15:00:08,574 DEBUG   HandlerThread:64393 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:08,594 DEBUG   HandlerThread:64393 [meta.py:_save_code():89] save code
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:08,603 DEBUG   HandlerThread:64393 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:08,656 DEBUG   HandlerThread:64393 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:08,657 DEBUG   HandlerThread:64393 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:10,003 DEBUG   HandlerThread:64393 [meta.py:probe():252] probe done
--2022-04-08 15:00:10,005 DEBUG   SenderThread:64393 [sender.py:send():179] send: files
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:11,189 DEBUG   SenderThread:64393 [sender.py:send():179] send: config
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:26,191 DEBUG   HandlerThread:64393 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:26,191 DEBUG   SenderThread:64393 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: history
--2022-04-08 15:00:27,421 DEBUG   SenderThread:64393 [sender.py:send():179] send: summary
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/logs/debug.log b/wandb/run-20220408_150006-abict4v2/logs/debug.log
-deleted file mode 100644
-index 2782e5f..0000000
---- a/wandb/run-20220408_150006-abict4v2/logs/debug.log
-+++ /dev/null
-@@ -1,51 +0,0 @@
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'abict4v2', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-abict4v2.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/logs/debug-internal.log
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:06,989 INFO    MainThread:64393 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 20, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 8, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:06,990 INFO    MainThread:64393 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:06,995 INFO    MainThread:64393 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:06,996 INFO    wandb_internal:64393 [internal.py:wandb_internal():91] W&B internal server running at pid: 64393, started at: 2022-04-08 15:00:06.995764
--2022-04-08 15:00:06,996 INFO    MainThread:64393 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:06,997 INFO    MainThread:64393 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:06,999 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 20, 'nhead': 8, 'nlayers': 6}
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:07,001 INFO    MainThread:64393 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:07,002 INFO    WriterThread:64393 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:07,446 INFO    MainThread:64393 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files
--2022-04-08 15:00:08,564 INFO    SenderThread:64393 [sender.py:_start_run_threads():707] run started: abict4v2 with start time 1649410206
--2022-04-08 15:00:08,566 INFO    MainThread:64393 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:08,566 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:09,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/requirements.txt
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code/train_translation.py
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/diff.patch
--2022-04-08 15:00:09,567 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/code
--2022-04-08 15:00:10,005 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:10,006 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:10,007 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:10,014 INFO    MainThread:64393 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:10,015 INFO    MainThread:64393 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:10,018 INFO    MainThread:64393 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:10,019 INFO    MainThread:64393 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/conda-environment.yaml
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:10,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-metadata.json
--2022-04-08 15:00:12,363 INFO    Thread-14 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/166an6d7-wandb-metadata.json
--2022-04-08 15:00:12,365 INFO    Thread-20 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/1a4gpeq3-diff.patch
--2022-04-08 15:00:12,565 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:12,588 INFO    Thread-15 :64393 [upload_job.py:push():133] Uploaded file /tmp/tmplw_yhgi2wandb/2g7bx28s-code/train_translation.py
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:14,566 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/config.yaml
--2022-04-08 15:00:18,643 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:20,644 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/output.log
--2022-04-08 15:00:27,424 INFO    SenderThread:64393 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:27,647 INFO    Thread-11 :64393 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150006-abict4v2/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb b/wandb/run-20220408_150006-abict4v2/run-abict4v2.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py b/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-deleted file mode 100644
-index e482ba7..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,364 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            model.eval()
--            predicted=[]
--            target=[]
--            
--            for i in test_loader: 
--                src = i[0].cuda(gpu, non_blocking=True)
--                tgt_out = i[3].cuda(gpu, non_blocking=True)
--                num_tokens = src.shape[0]
--
--                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--                out = translate(model, src, tokenizer, src_mask, gpu)
--                predicted.append(out)
--                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--                try: 
--                    bleu_score(predicted, target)
--                except: 
--                    predicted.pop()
--                    target.pop()
--            
--            bleu_score = bleu_score(predicted, target)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml b/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml b/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-deleted file mode 100644
-index ea14f0e..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 64
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch b/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-deleted file mode 100644
-index 47b804f..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
-+++ /dev/null
-@@ -1,234 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2248477 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,95 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..e482ba7 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -299,7 +299,9 @@ def main_worker(gpu, args):
--                     predicted.pop()
--                     target.pop()
--             
---            print(bleu_score(predicted, target))
--+            bleu_score = bleu_score(predicted, target)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,7 +313,7 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..165ed2c 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..f1325dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..1413293 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_150037-ba0yl54z
--\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/output.log b/wandb/run-20220408_150037-ba0yl54z/files/output.log
-deleted file mode 100644
-index 6742216..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt b/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-deleted file mode 100644
-index 5a492ae..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T09:30:38.254663",
--    "startedAt": "2022-04-08T09:30:37.394479",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=64",
--        "--dfeedforward=512",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json b/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-deleted file mode 100644
-index 662ac89..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.082856178283691, "_runtime": 16, "_timestamp": 1649410253, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-deleted file mode 100644
-index 0c041a1..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,404 DEBUG   MainThread:64646 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 DEBUG   SenderThread:64646 [sender.py:send():179] send: header
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,410 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:00:37,410 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:37,611 DEBUG   SenderThread:64646 [sender.py:send():179] send: run
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:38,252 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():39] meta init
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:__init__():53] meta init done
--2022-04-08 15:00:38,254 DEBUG   HandlerThread:64646 [meta.py:probe():210] probe
--2022-04-08 15:00:38,260 DEBUG   HandlerThread:64646 [meta.py:_setup_git():200] setup git
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:00:38,280 DEBUG   HandlerThread:64646 [meta.py:_save_code():89] save code
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_code():110] save code done
--2022-04-08 15:00:38,289 DEBUG   HandlerThread:64646 [meta.py:_save_patches():127] save patches
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:00:38,341 DEBUG   HandlerThread:64646 [meta.py:_save_pip():57] save pip
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:00:38,342 DEBUG   HandlerThread:64646 [meta.py:_save_conda():78] save conda
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:00:39,663 DEBUG   HandlerThread:64646 [meta.py:probe():252] probe done
--2022-04-08 15:00:39,665 DEBUG   SenderThread:64646 [sender.py:send():179] send: files
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,676 DEBUG   HandlerThread:64646 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:00:39,676 DEBUG   SenderThread:64646 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:40,430 DEBUG   SenderThread:64646 [sender.py:send():179] send: config
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: history
--2022-04-08 15:00:53,735 DEBUG   SenderThread:64646 [sender.py:send():179] send: summary
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log b/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-deleted file mode 100644
-index 4346748..0000000
---- a/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
-+++ /dev/null
-@@ -1,50 +0,0 @@
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'lrpyor0l', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'ba0yl54z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml', 'start_method': 'thread'}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/logs/debug-internal.log
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 64, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 512, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:37,395 INFO    MainThread:64646 [wandb_init.py:init():418] starting backend
--2022-04-08 15:00:37,401 INFO    MainThread:64646 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:00:37,402 INFO    wandb_internal:64646 [internal.py:wandb_internal():91] W&B internal server running at pid: 64646, started at: 2022-04-08 15:00:37.401702
--2022-04-08 15:00:37,402 INFO    MainThread:64646 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:00:37,404 INFO    MainThread:64646 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:00:37,406 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 64, 'dfeedforward': 512, 'epochs': 32, 'nhead': 2, 'nlayers': 6}
--2022-04-08 15:00:37,408 INFO    MainThread:64646 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:00:37,409 INFO    MainThread:64646 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:00:37,409 INFO    WriterThread:64646 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:00:37,610 INFO    MainThread:64646 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:00:38,249 INFO    SenderThread:64646 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files
--2022-04-08 15:00:38,250 INFO    SenderThread:64646 [sender.py:_start_run_threads():707] run started: ba0yl54z with start time 1649410237
--2022-04-08 15:00:38,251 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:38,252 INFO    MainThread:64646 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/requirements.txt
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/diff.patch
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code/train_translation.py
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:39,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/code
--2022-04-08 15:00:39,665 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:00:39,666 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:00:39,667 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:00:39,676 INFO    MainThread:64646 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:00:39,678 INFO    MainThread:64646 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:00:39,680 INFO    MainThread:64646 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/conda-environment.yaml
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-metadata.json
--2022-04-08 15:00:40,250 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:41,110 INFO    Thread-16 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1bd5x3gn-diff.patch
--2022-04-08 15:00:41,186 INFO    Thread-15 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1kw8gilq-code/train_translation.py
--2022-04-08 15:00:41,285 INFO    Thread-14 :64646 [upload_job.py:push():133] Uploaded file /tmp/tmpy70agkq_wandb/1nmym46e-wandb-metadata.json
--2022-04-08 15:00:42,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:43,251 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/config.yaml
--2022-04-08 15:00:46,252 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:48,253 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/output.log
--2022-04-08 15:00:53,737 INFO    SenderThread:64646 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:00:54,255 INFO    Thread-11 :64646 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_150037-ba0yl54z/files/wandb-summary.json
-diff --git a/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb b/wandb/run-20220408_150037-ba0yl54z/run-ba0yl54z.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py b/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml b/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml b/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-deleted file mode 100644
-index 546bdaa..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 512
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 16
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch b/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-deleted file mode 100644
-index c98ba4e..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
-+++ /dev/null
-@@ -1,285 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ea51a40 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,97 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f8e98b2 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..9304e2b 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b02872b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153004-dg43ixc4
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/output.log b/wandb/run-20220408_153004-dg43ixc4/files/output.log
-deleted file mode 100644
-index f49019d..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/output.log
-+++ /dev/null
-@@ -1,11 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt b/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-deleted file mode 100644
-index 109e1b6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:05.796412",
--    "startedAt": "2022-04-08T10:00:04.837672",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=512",
--        "--epochs=16",
--        "--nhead=6",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json b/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-deleted file mode 100644
-index 09cdda6..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.140233993530273, "_runtime": 15, "_timestamp": 1649412019, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-deleted file mode 100644
-index 9669aaf..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,848 DEBUG   MainThread:65348 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,851 DEBUG   SenderThread:65348 [sender.py:send():179] send: header
--2022-04-08 15:30:04,851 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:04,852 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,022 DEBUG   SenderThread:65348 [sender.py:send():179] send: run
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:05,794 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():39] meta init
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:05,796 DEBUG   HandlerThread:65348 [meta.py:probe():210] probe
--2022-04-08 15:30:05,802 DEBUG   HandlerThread:65348 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:05,821 DEBUG   HandlerThread:65348 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:05,822 DEBUG   HandlerThread:65348 [meta.py:_save_code():89] save code
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:05,831 DEBUG   HandlerThread:65348 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:05,886 DEBUG   HandlerThread:65348 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:07,220 DEBUG   HandlerThread:65348 [meta.py:probe():252] probe done
--2022-04-08 15:30:07,221 DEBUG   SenderThread:65348 [sender.py:send():179] send: files
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,232 DEBUG   HandlerThread:65348 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:07,233 DEBUG   SenderThread:65348 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,677 DEBUG   SenderThread:65348 [sender.py:send():179] send: config
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: history
--2022-04-08 15:30:19,407 DEBUG   SenderThread:65348 [sender.py:send():179] send: summary
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log b/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-deleted file mode 100644
-index 66c14b1..0000000
---- a/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'dg43ixc4', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/logs/debug-internal.log
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 16, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 512, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:04,839 INFO    MainThread:65348 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:04,845 INFO    MainThread:65348 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:04,846 INFO    wandb_internal:65348 [internal.py:wandb_internal():91] W&B internal server running at pid: 65348, started at: 2022-04-08 15:30:04.845569
--2022-04-08 15:30:04,846 INFO    MainThread:65348 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:04,848 INFO    MainThread:65348 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:04,849 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 512, 'epochs': 16, 'nhead': 6, 'nlayers': 4}
--2022-04-08 15:30:04,850 INFO    MainThread:65348 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:04,851 INFO    MainThread:65348 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:04,852 INFO    WriterThread:65348 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:05,021 INFO    MainThread:65348 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files
--2022-04-08 15:30:05,792 INFO    SenderThread:65348 [sender.py:_start_run_threads():707] run started: dg43ixc4 with start time 1649412004
--2022-04-08 15:30:05,793 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:05,793 INFO    MainThread:65348 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code/train_translation.py
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/requirements.txt
--2022-04-08 15:30:06,794 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/diff.patch
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:06,795 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/code
--2022-04-08 15:30:07,222 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:07,223 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:07,232 INFO    MainThread:65348 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:07,234 INFO    MainThread:65348 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:07,235 INFO    MainThread:65348 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:07,236 INFO    MainThread:65348 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/conda-environment.yaml
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-metadata.json
--2022-04-08 15:30:07,792 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:08,525 INFO    Thread-16 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/npor673v-diff.patch
--2022-04-08 15:30:08,527 INFO    Thread-14 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/1fwboqq3-wandb-metadata.json
--2022-04-08 15:30:08,548 INFO    Thread-15 :65348 [upload_job.py:push():133] Uploaded file /tmp/tmpuu5pqhpgwandb/2pescb75-code/train_translation.py
--2022-04-08 15:30:09,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:09,943 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/config.yaml
--2022-04-08 15:30:11,936 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/output.log
--2022-04-08 15:30:19,409 INFO    SenderThread:65348 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:19,939 INFO    Thread-11 :65348 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153004-dg43ixc4/files/wandb-summary.json
-diff --git a/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb b/wandb/run-20220408_153004-dg43ixc4/run-dg43ixc4.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py b/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-deleted file mode 100644
-index 52a946e..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
-+++ /dev/null
-@@ -1,370 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    
--    for epoch in range(start_epoch, args.epochs):
--        sampler.set_epoch(epoch)
--        epoch_loss = 0 
--        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--            src = sent[0].cuda(gpu, non_blocking=True)
--            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--            tgt_out = sent[3].cuda(gpu, non_blocking=True)
--            
--            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--            
--            optimizer.zero_grad()
--
--            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--            loss.backward()
--
--            optimizer.step()
--            # losses += loss.item()
--            
--            wandb.log({'iter_loss': loss})
--            epoch_loss += loss.item()
--            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--            
--            if step % args.print_freq == 0:
--                if args.rank == 0:
--                    stats = dict(epoch=epoch, step=step,
--                                 loss=loss.item(),
--                                 time=int(time.time() - start_time))
--                    print(json.dumps(stats))
--                    print(json.dumps(stats), file=stats_file)
--        wandb.log({"epoch_loss":epoch_loss})
--        if args.rank == 0:
--            # save checkpoint
--            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                         optimizer=optimizer.state_dict())
--            # print(model.state_dict)
--            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--            print('translation model saved in', args.checkpoint_dir)
--        
--##############################################################
--        if epoch%args.checkbleu ==0 : 
--
--            bleu_score = checkbleu(test_loader, gpu)
--            wandb.log({'bleu_score': bleu_score}) 
--#            print(bleu_score(predicted, target))
--##############################################################
--#        if epoch%1 ==0 : 
--#            torch.save(model.module.state_dict(),
--#                   'path.pth')
--#            print("Model is saved")
--        # if args.rank == 0:
--        #     # save checkpoint
--        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--        #                  optimizer=optimizer.state_dict())
--        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--        #     print('saved translation model in', args.checkpoint_dir)
--    wandb.finish()
--           
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml b/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml b/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-deleted file mode 100644
-index 122f33a..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
-+++ /dev/null
-@@ -1,101 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 256
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 6
--nlayers:
--  desc: null
--  value: 2
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch b/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-deleted file mode 100644
-index 797f0a1..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
-+++ /dev/null
-@@ -1,287 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..356076f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,99 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..52a946e 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -143,9 +143,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -256,7 +256,7 @@ def main_worker(gpu, args):
--             optimizer.step()
--             # losses += loss.item()
--             
---#            wandb.log({'iter_loss': loss})
--+            wandb.log({'iter_loss': loss})
--             epoch_loss += loss.item()
--             torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--             
--@@ -267,7 +267,7 @@ def main_worker(gpu, args):
--                                  time=int(time.time() - start_time))
--                     print(json.dumps(stats))
--                     print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
--+        wandb.log({"epoch_loss":epoch_loss})
--         if args.rank == 0:
--             # save checkpoint
--             state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--@@ -279,27 +279,9 @@ def main_worker(gpu, args):
-- ##############################################################
--         if epoch%args.checkbleu ==0 : 
-- 
---            model.eval()
---            predicted=[]
---            target=[]
---            
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
---                
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
---            
---            print(bleu_score(predicted, target))
--+            bleu_score = checkbleu(test_loader, gpu)
--+            wandb.log({'bleu_score': bleu_score}) 
--+#            print(bleu_score(predicted, target))
-- ##############################################################
-- #        if epoch%1 ==0 : 
-- #            torch.save(model.module.state_dict(),
--@@ -311,10 +293,36 @@ def main_worker(gpu, args):
--         #                  optimizer=optimizer.state_dict())
--         #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--         #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
--+    wandb.finish()
--            
-- 
-- 
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--+            
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+                
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--+            
--+        bleu_score = bleu_score(predicted, target)
--+
--+    return bleu_score
--+
-- '''
-- todo: 
--     BLEU score
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7b452fc 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..48b2ecd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..93be230 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220408_153027-fwwd5rya
--\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/output.log b/wandb/run-20220408_153027-fwwd5rya/files/output.log
-deleted file mode 100644
-index e86aeca..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-17:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt b/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-deleted file mode 100644
-index dcac75d..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-08T10:00:27.794832",
--    "startedAt": "2022-04-08T10:00:27.031889",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=256",
--        "--dfeedforward=256",
--        "--epochs=40",
--        "--nhead=6",
--        "--nlayers=2"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json b/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-deleted file mode 100644
-index e70a2b8..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
-+++ /dev/null
-@@ -1,99 +0,0 @@
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,040 DEBUG   MainThread:65601 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,046 DEBUG   SenderThread:65601 [sender.py:send():179] send: header
--2022-04-08 15:30:27,046 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: check_version
--2022-04-08 15:30:27,047 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: check_version
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,254 DEBUG   SenderThread:65601 [sender.py:send():179] send: run
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: summary
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:27,792 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: run_start
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():39] meta init
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:__init__():53] meta init done
--2022-04-08 15:30:27,794 DEBUG   HandlerThread:65601 [meta.py:probe():210] probe
--2022-04-08 15:30:27,800 DEBUG   HandlerThread:65601 [meta.py:_setup_git():200] setup git
--2022-04-08 15:30:27,819 DEBUG   HandlerThread:65601 [meta.py:_setup_git():207] setup git done
--2022-04-08 15:30:27,820 DEBUG   HandlerThread:65601 [meta.py:_save_code():89] save code
--2022-04-08 15:30:27,828 DEBUG   HandlerThread:65601 [meta.py:_save_code():110] save code done
--2022-04-08 15:30:27,829 DEBUG   HandlerThread:65601 [meta.py:_save_patches():127] save patches
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_patches():169] save patches done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():57] save pip
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_pip():71] save pip done
--2022-04-08 15:30:27,882 DEBUG   HandlerThread:65601 [meta.py:_save_conda():78] save conda
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:_save_conda():86] save conda done
--2022-04-08 15:30:29,200 DEBUG   HandlerThread:65601 [meta.py:probe():252] probe done
--2022-04-08 15:30:29,202 DEBUG   SenderThread:65601 [sender.py:send():179] send: files
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 DEBUG   HandlerThread:65601 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-08 15:30:29,214 DEBUG   SenderThread:65601 [sender.py:send_request():193] send_request: stop_status
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,791 DEBUG   SenderThread:65601 [sender.py:send():179] send: config
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log b/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-deleted file mode 100644
-index 987c5d6..0000000
---- a/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
-+++ /dev/null
-@@ -1,84 +0,0 @@
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': 'q27ijx1y', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'fwwd5rya', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml', 'start_method': 'thread'}
--2022-04-08 15:30:27,032 INFO    MainThread:65601 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/logs/debug-internal.log
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():369] calling init triggers
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 256, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 6, 'dfeedforward': 256, 'nlayers': 2, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:27,033 INFO    MainThread:65601 [wandb_init.py:init():418] starting backend
--2022-04-08 15:30:27,038 INFO    MainThread:65601 [backend.py:ensure_launched():132] starting backend process...
--2022-04-08 15:30:27,039 INFO    MainThread:65601 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-08 15:30:27,040 INFO    wandb_internal:65601 [internal.py:wandb_internal():91] W&B internal server running at pid: 65601, started at: 2022-04-08 15:30:27.039181
--2022-04-08 15:30:27,040 INFO    MainThread:65601 [wandb_init.py:init():423] backend started and connected
--2022-04-08 15:30:27,043 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 256, 'dfeedforward': 256, 'epochs': 40, 'nhead': 6, 'nlayers': 2}
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():465] updated telemetry
--2022-04-08 15:30:27,045 INFO    MainThread:65601 [wandb_init.py:init():484] communicating current version
--2022-04-08 15:30:27,046 INFO    WriterThread:65601 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:27,253 INFO    MainThread:65601 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.13 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-08 15:30:27,254 INFO    MainThread:65601 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:27,789 INFO    SenderThread:65601 [sender.py:_start_run_threads():707] run started: fwwd5rya with start time 1649412027
--2022-04-08 15:30:27,791 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-08 15:30:27,792 INFO    MainThread:65601 [wandb_init.py:init():522] starting run threads in backend
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:28,791 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch
--2022-04-08 15:30:28,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code
--2022-04-08 15:30:29,202 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-08 15:30:29,203 INFO    SenderThread:65601 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-08 15:30:29,213 INFO    MainThread:65601 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-08 15:30:29,214 INFO    MainThread:65601 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-08 15:30:29,215 INFO    MainThread:65601 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-08 15:30:29,216 INFO    MainThread:65601 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-08 15:30:29,218 INFO    MainThread:65601 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-08 15:30:29,792 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json
--2022-04-08 15:30:29,793 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:30,468 INFO    Thread-14 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/wm4wxh62-wandb-metadata.json
--2022-04-08 15:30:30,483 INFO    Thread-15 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/12sn1grf-code/train_translation.py
--2022-04-08 15:30:30,586 INFO    Thread-16 :65601 [upload_job.py:push():133] Uploaded file /tmp/tmp_5d66la0wandb/1yya4rls-diff.patch
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:31,796 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:33,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:35,797 INFO    Thread-11 :65601 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:36,051 WARNING wandb_internal:65601 [internal.py:is_dead():367] Internal process exiting, parent pid 65592 disappeared
--2022-04-08 15:30:36,051 ERROR   wandb_internal:65601 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-08 15:30:36,225 INFO    WriterThread:65601 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [sender.py:finish():933] shutting down sender
--2022-04-08 15:30:36,225 INFO    SenderThread:65601 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt requirements.txt
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-metadata.json wandb-metadata.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log output.log
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml conda-environment.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json wandb-summary.json
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml config.yaml
--2022-04-08 15:30:36,798 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/diff.patch diff.patch
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/code/train_translation.py code/train_translation.py
--2022-04-08 15:30:36,800 INFO    SenderThread:65601 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-08 15:30:36,801 INFO    SenderThread:65601 [file_pusher.py:join():181] waiting for file pusher
--2022-04-08 15:30:38,053 INFO    Thread-27 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/config.yaml
--2022-04-08 15:30:38,054 INFO    Thread-25 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/conda-environment.yaml
--2022-04-08 15:30:38,246 INFO    Thread-23 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/requirements.txt
--2022-04-08 15:30:38,247 INFO    Thread-24 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/output.log
--2022-04-08 15:30:38,687 INFO    Thread-26 :65601 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220408_153027-fwwd5rya/files/wandb-summary.json
--2022-04-08 15:30:40,967 ERROR   wandb_internal:65601 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
-diff --git a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb b/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb
-deleted file mode 100644
-index bfb12ff..0000000
-Binary files a/wandb/run-20220408_153027-fwwd5rya/run-fwwd5rya.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py b/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml b/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml b/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch b/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-deleted file mode 100644
-index bd71761..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
-+++ /dev/null
-@@ -1,377 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..d3a775c 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,100 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..74ec524 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..c957937 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..287708f 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152616-3a3gw94y
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/output.log b/wandb/run-20220409_152616-3a3gw94y/files/output.log
-deleted file mode 100644
-index 13e9c3e..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt b/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-deleted file mode 100644
-index 20f0482..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:56:17.429229",
--    "startedAt": "2022-04-09T09:56:16.815816",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json b/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-deleted file mode 100644
-index 5602f92..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 16, "_timestamp": 1649498192, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-deleted file mode 100644
-index 2546fd3..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,824 DEBUG   MainThread:3266 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,828 DEBUG   SenderThread:3266 [sender.py:send():179] send: header
--2022-04-09 15:26:16,829 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:16,984 DEBUG   SenderThread:3266 [sender.py:send():179] send: run
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:17,426 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():39] meta init
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:__init__():53] meta init done
--2022-04-09 15:26:17,429 DEBUG   HandlerThread:3266 [meta.py:probe():210] probe
--2022-04-09 15:26:17,435 DEBUG   HandlerThread:3266 [meta.py:_setup_git():200] setup git
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:26:17,450 DEBUG   HandlerThread:3266 [meta.py:_save_code():89] save code
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_code():110] save code done
--2022-04-09 15:26:17,456 DEBUG   HandlerThread:3266 [meta.py:_save_patches():127] save patches
--2022-04-09 15:26:17,564 DEBUG   HandlerThread:3266 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:26:17,565 DEBUG   HandlerThread:3266 [meta.py:_save_pip():57] save pip
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:26:17,566 DEBUG   HandlerThread:3266 [meta.py:_save_conda():78] save conda
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:26:19,487 DEBUG   HandlerThread:3266 [meta.py:probe():252] probe done
--2022-04-09 15:26:19,491 DEBUG   SenderThread:3266 [sender.py:send():179] send: files
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 DEBUG   HandlerThread:3266 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:26:19,497 DEBUG   SenderThread:3266 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:19,831 DEBUG   SenderThread:3266 [sender.py:send():179] send: config
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: history
--2022-04-09 15:26:32,511 DEBUG   SenderThread:3266 [sender.py:send():179] send: summary
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log b/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-deleted file mode 100644
-index ebbf034..0000000
---- a/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/logs/debug-internal.log
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:16,817 INFO    MainThread:3266 [wandb_init.py:init():418] starting backend
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:26:16,822 INFO    MainThread:3266 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:26:16,823 INFO    wandb_internal:3266 [internal.py:wandb_internal():91] W&B internal server running at pid: 3266, started at: 2022-04-09 15:26:16.822572
--2022-04-09 15:26:16,823 INFO    MainThread:3266 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:26:16,827 INFO    MainThread:3266 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:26:16,828 INFO    WriterThread:3266 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
--2022-04-09 15:26:16,980 INFO    MainThread:3266 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:26:16,981 INFO    MainThread:3266 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files
--2022-04-09 15:26:17,424 INFO    SenderThread:3266 [sender.py:_start_run_threads():707] run started: 3a3gw94y with start time 1649498176
--2022-04-09 15:26:17,425 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:17,426 INFO    MainThread:3266 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code/train_translation.py
--2022-04-09 15:26:18,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/requirements.txt
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/diff.patch
--2022-04-09 15:26:18,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/code
--2022-04-09 15:26:19,424 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/conda-environment.yaml
--2022-04-09 15:26:19,491 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:26:19,492 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:26:19,497 INFO    MainThread:3266 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:26:19,501 INFO    MainThread:3266 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:26:19,502 INFO    MainThread:3266 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:26:19,505 INFO    MainThread:3266 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:20,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-metadata.json
--2022-04-09 15:26:20,885 INFO    Thread-14 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1te7qq4j-wandb-metadata.json
--2022-04-09 15:26:20,887 INFO    Thread-22 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/tiwzm18e-diff.patch
--2022-04-09 15:26:20,888 INFO    Thread-17 :3266 [upload_job.py:push():133] Uploaded file /tmp/tmpegocdq1xwandb/1x2d20v2-code/train_translation.py
--2022-04-09 15:26:21,425 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/config.yaml
--2022-04-09 15:26:22,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:24,426 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:26,427 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/output.log
--2022-04-09 15:26:32,514 INFO    SenderThread:3266 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:26:33,430 INFO    Thread-11 :3266 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152616-3a3gw94y/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb b/wandb/run-20220409_152616-3a3gw94y/run-3a3gw94y.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py b/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-deleted file mode 100644
-index 197ab25..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu_score = bleu_score(predicted, target)
--
--    return bleu_score
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml b/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml b/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch b/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-deleted file mode 100644
-index c3ed101..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
-+++ /dev/null
-@@ -1,379 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..ed88fe4 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,102 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..197ab25 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu_score = bleu_score(predicted, target)
-- 
--+    return bleu_score
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..4895794 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..1f9d48c 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..dfe2dcb 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_152708-15jgzcwp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/output.log b/wandb/run-20220409_152708-15jgzcwp/files/output.log
-deleted file mode 100644
-index 9a9a49f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt b/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-deleted file mode 100644
-index abaad7d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T09:57:09.613679",
--    "startedAt": "2022-04-09T09:57:08.966939",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json b/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-deleted file mode 100644
-index 0164a0d..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 12, "_timestamp": 1649498241, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-deleted file mode 100644
-index de7918e..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,002 DEBUG   MainThread:3540 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,017 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send():179] send: header
--2022-04-09 15:27:09,018 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: check_version
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,109 DEBUG   SenderThread:3540 [sender.py:send():179] send: run
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:09,611 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():39] meta init
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:__init__():53] meta init done
--2022-04-09 15:27:09,613 DEBUG   HandlerThread:3540 [meta.py:probe():210] probe
--2022-04-09 15:27:09,619 DEBUG   HandlerThread:3540 [meta.py:_setup_git():200] setup git
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_setup_git():207] setup git done
--2022-04-09 15:27:09,636 DEBUG   HandlerThread:3540 [meta.py:_save_code():89] save code
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_code():110] save code done
--2022-04-09 15:27:09,644 DEBUG   HandlerThread:3540 [meta.py:_save_patches():127] save patches
--2022-04-09 15:27:09,693 DEBUG   HandlerThread:3540 [meta.py:_save_patches():169] save patches done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():57] save pip
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_pip():71] save pip done
--2022-04-09 15:27:09,694 DEBUG   HandlerThread:3540 [meta.py:_save_conda():78] save conda
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,002 DEBUG   HandlerThread:3540 [meta.py:_save_conda():86] save conda done
--2022-04-09 15:27:11,003 DEBUG   HandlerThread:3540 [meta.py:probe():252] probe done
--2022-04-09 15:27:11,004 DEBUG   SenderThread:3540 [sender.py:send():179] send: files
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 DEBUG   HandlerThread:3540 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 DEBUG   SenderThread:3540 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,362 DEBUG   SenderThread:3540 [sender.py:send():179] send: config
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: history
--2022-04-09 15:27:21,558 DEBUG   SenderThread:3540 [sender.py:send():179] send: summary
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log b/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-deleted file mode 100644
-index 023162f..0000000
---- a/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 15:27:08,971 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug.log
--2022-04-09 15:27:08,972 INFO    MainThread:3540 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/logs/debug-internal.log
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():369] calling init triggers
--2022-04-09 15:27:08,973 INFO    MainThread:3540 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:08,974 INFO    MainThread:3540 [wandb_init.py:init():418] starting backend
--2022-04-09 15:27:08,994 INFO    MainThread:3540 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 15:27:08,996 INFO    MainThread:3540 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 15:27:08,998 INFO    wandb_internal:3540 [internal.py:wandb_internal():91] W&B internal server running at pid: 3540, started at: 2022-04-09 15:27:08.995965
--2022-04-09 15:27:09,002 INFO    MainThread:3540 [wandb_init.py:init():423] backend started and connected
--2022-04-09 15:27:09,013 INFO    MainThread:3540 [wandb_init.py:init():465] updated telemetry
--2022-04-09 15:27:09,014 INFO    MainThread:3540 [wandb_init.py:init():484] communicating current version
--2022-04-09 15:27:09,016 INFO    WriterThread:3540 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 15:27:09,107 INFO    MainThread:3540 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files
--2022-04-09 15:27:09,608 INFO    SenderThread:3540 [sender.py:_start_run_threads():707] run started: 15jgzcwp with start time 1649498229
--2022-04-09 15:27:09,610 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:09,610 INFO    MainThread:3540 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/requirements.txt
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code/train_translation.py
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/diff.patch
--2022-04-09 15:27:10,609 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/code
--2022-04-09 15:27:11,004 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 15:27:11,005 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 15:27:11,006 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 15:27:11,013 INFO    MainThread:3540 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 15:27:11,015 INFO    MainThread:3540 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 15:27:11,017 INFO    MainThread:3540 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 15:27:11,018 INFO    MainThread:3540 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/conda-environment.yaml
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:11,608 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-metadata.json
--2022-04-09 15:27:11,957 INFO    Thread-18 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/r7pplw70-diff.patch
--2022-04-09 15:27:12,433 INFO    Thread-15 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/2g6gfxwx-code/train_translation.py
--2022-04-09 15:27:12,434 INFO    Thread-14 :3540 [upload_job.py:push():133] Uploaded file /tmp/tmp9sk6_xjuwandb/1mjjo7ai-wandb-metadata.json
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:13,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/config.yaml
--2022-04-09 15:27:15,610 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:17,611 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/output.log
--2022-04-09 15:27:21,560 INFO    SenderThread:3540 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 15:27:21,613 INFO    Thread-11 :3540 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_152708-15jgzcwp/files/wandb-summary.json
-diff --git a/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb b/wandb/run-20220409_152708-15jgzcwp/run-15jgzcwp.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py b/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-deleted file mode 100644
-index 596bd8d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml b/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch b/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-deleted file mode 100644
-index edba74d..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
-+++ /dev/null
-@@ -1,457 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..6f7f3e6 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,180 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..596bd8d 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..7064436 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..3ee4416 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..425ec98 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160115-yr1wk5mi
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/output.log b/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-deleted file mode 100644
-index e872735..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/output.log
-+++ /dev/null
-@@ -1,6 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt b/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-deleted file mode 100644
-index 39bdbe7..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:31:16.739157",
--    "startedAt": "2022-04-09T10:31:15.626079",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json b/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-deleted file mode 100644
-index 96a4906..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"iter_loss": 7.142178058624268, "_runtime": 14, "_timestamp": 1649500289, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-deleted file mode 100644
-index 2dc7db1..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
-+++ /dev/null
-@@ -1,66 +0,0 @@
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,660 DEBUG   MainThread:6109 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 DEBUG   SenderThread:6109 [sender.py:send():179] send: header
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,673 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:01:15,673 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:15,970 DEBUG   SenderThread:6109 [sender.py:send():179] send: run
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:16,736 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():39] meta init
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:__init__():53] meta init done
--2022-04-09 16:01:16,739 DEBUG   HandlerThread:6109 [meta.py:probe():210] probe
--2022-04-09 16:01:16,745 DEBUG   HandlerThread:6109 [meta.py:_setup_git():200] setup git
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:01:16,762 DEBUG   HandlerThread:6109 [meta.py:_save_code():89] save code
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_code():110] save code done
--2022-04-09 16:01:16,769 DEBUG   HandlerThread:6109 [meta.py:_save_patches():127] save patches
--2022-04-09 16:01:16,811 DEBUG   HandlerThread:6109 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():57] save pip
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:01:16,812 DEBUG   HandlerThread:6109 [meta.py:_save_conda():78] save conda
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:01:18,148 DEBUG   HandlerThread:6109 [meta.py:probe():252] probe done
--2022-04-09 16:01:18,150 DEBUG   SenderThread:6109 [sender.py:send():179] send: files
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,158 DEBUG   HandlerThread:6109 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:01:18,158 DEBUG   SenderThread:6109 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,709 DEBUG   SenderThread:6109 [sender.py:send():179] send: config
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: history
--2022-04-09 16:01:29,848 DEBUG   SenderThread:6109 [sender.py:send():179] send: summary
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log b/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-deleted file mode 100644
-index 87f5666..0000000
---- a/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
-+++ /dev/null
-@@ -1,49 +0,0 @@
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:01:15,631 INFO    MainThread:6109 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug.log
--2022-04-09 16:01:15,632 INFO    MainThread:6109 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/logs/debug-internal.log
--2022-04-09 16:01:15,633 INFO    MainThread:6109 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:15,634 INFO    MainThread:6109 [wandb_init.py:init():418] starting backend
--2022-04-09 16:01:15,655 INFO    MainThread:6109 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:01:15,656 INFO    MainThread:6109 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:01:15,658 INFO    wandb_internal:6109 [internal.py:wandb_internal():91] W&B internal server running at pid: 6109, started at: 2022-04-09 16:01:15.656065
--2022-04-09 16:01:15,659 INFO    MainThread:6109 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:01:15,670 INFO    MainThread:6109 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:01:15,672 INFO    WriterThread:6109 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:01:15,966 INFO    MainThread:6109 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:01:16,733 INFO    SenderThread:6109 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files
--2022-04-09 16:01:16,734 INFO    SenderThread:6109 [sender.py:_start_run_threads():707] run started: yr1wk5mi with start time 1649500275
--2022-04-09 16:01:16,735 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:16,736 INFO    MainThread:6109 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/requirements.txt
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/diff.patch
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code/train_translation.py
--2022-04-09 16:01:17,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/code
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:01:18,150 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:01:18,151 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:01:18,160 INFO    MainThread:6109 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:01:18,162 INFO    MainThread:6109 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:01:18,163 INFO    MainThread:6109 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:01:18,164 INFO    MainThread:6109 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/conda-environment.yaml
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-metadata.json
--2022-04-09 16:01:18,734 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:19,843 INFO    Thread-14 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/3aqderx8-wandb-metadata.json
--2022-04-09 16:01:19,846 INFO    Thread-15 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/4nx7fbcb-code/train_translation.py
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:20,735 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/config.yaml
--2022-04-09 16:01:20,845 INFO    Thread-18 :6109 [upload_job.py:push():133] Uploaded file /tmp/tmpguz2ugxewandb/35j9ij83-diff.patch
--2022-04-09 16:01:22,918 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:24,920 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/output.log
--2022-04-09 16:01:29,851 INFO    SenderThread:6109 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:01:29,923 INFO    Thread-11 :6109 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160115-yr1wk5mi/files/wandb-summary.json
-diff --git a/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb b/wandb/run-20220409_160115-yr1wk5mi/run-yr1wk5mi.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py b/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-deleted file mode 100644
-index feaf1fc..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
-+++ /dev/null
-@@ -1,377 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            wandb.log({"epoch_loss":epoch_loss})
--            if args.rank == 0:
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml b/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch b/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-deleted file mode 100644
-index eec0ab3..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
-+++ /dev/null
-@@ -1,459 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..8b42533 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,182 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..feaf1fc 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,97 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            wandb.log({"epoch_loss":epoch_loss})
--+            if args.rank == 0:
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..e712296 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b2fc627 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..337b531 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160246-2bmbfqcy
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/output.log b/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-deleted file mode 100644
-index e15e9a4..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/output.log
-+++ /dev/null
-@@ -1,17 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt b/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-deleted file mode 100644
-index f4efc7b..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:32:47.190940",
--    "startedAt": "2022-04-09T10:32:46.030719",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json b/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-deleted file mode 100644
-index 59ceedf..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 18, "_timestamp": 1649500384, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-deleted file mode 100644
-index 4dae842..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
-+++ /dev/null
-@@ -1,68 +0,0 @@
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,040 DEBUG   MainThread:6410 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send():179] send: header
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,043 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:46,151 DEBUG   SenderThread:6410 [sender.py:send():179] send: run
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:47,188 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():39] meta init
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:__init__():53] meta init done
--2022-04-09 16:02:47,190 DEBUG   HandlerThread:6410 [meta.py:probe():210] probe
--2022-04-09 16:02:47,197 DEBUG   HandlerThread:6410 [meta.py:_setup_git():200] setup git
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:02:47,216 DEBUG   HandlerThread:6410 [meta.py:_save_code():89] save code
--2022-04-09 16:02:47,224 DEBUG   HandlerThread:6410 [meta.py:_save_code():110] save code done
--2022-04-09 16:02:47,225 DEBUG   HandlerThread:6410 [meta.py:_save_patches():127] save patches
--2022-04-09 16:02:47,270 DEBUG   HandlerThread:6410 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():57] save pip
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:02:47,271 DEBUG   HandlerThread:6410 [meta.py:_save_conda():78] save conda
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:02:48,637 DEBUG   HandlerThread:6410 [meta.py:probe():252] probe done
--2022-04-09 16:02:48,639 DEBUG   SenderThread:6410 [sender.py:send():179] send: files
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,649 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:49,267 DEBUG   SenderThread:6410 [sender.py:send():179] send: config
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,268 DEBUG   HandlerThread:6410 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:03:04,269 DEBUG   SenderThread:6410 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:03:04,791 DEBUG   SenderThread:6410 [sender.py:send():179] send: history
--2022-04-09 16:03:04,792 DEBUG   SenderThread:6410 [sender.py:send():179] send: summary
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log b/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-deleted file mode 100644
-index c4edd31..0000000
---- a/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug.log
--2022-04-09 16:02:46,031 INFO    MainThread:6410 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/logs/debug-internal.log
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:46,032 INFO    MainThread:6410 [wandb_init.py:init():418] starting backend
--2022-04-09 16:02:46,037 INFO    MainThread:6410 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:02:46,038 INFO    wandb_internal:6410 [internal.py:wandb_internal():91] W&B internal server running at pid: 6410, started at: 2022-04-09 16:02:46.037354
--2022-04-09 16:02:46,038 INFO    MainThread:6410 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:02:46,039 INFO    MainThread:6410 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:02:46,042 INFO    MainThread:6410 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:02:46,043 INFO    WriterThread:6410 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
--2022-04-09 16:02:46,147 INFO    MainThread:6410 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:02:46,148 INFO    MainThread:6410 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files
--2022-04-09 16:02:47,185 INFO    SenderThread:6410 [sender.py:_start_run_threads():707] run started: 2bmbfqcy with start time 1649500366
--2022-04-09 16:02:47,187 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:02:47,188 INFO    MainThread:6410 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:02:48,186 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-summary.json
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/diff.patch
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/requirements.txt
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code/train_translation.py
--2022-04-09 16:02:48,187 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/code
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:02:48,639 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:02:48,640 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:02:48,649 INFO    MainThread:6410 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:02:48,651 INFO    MainThread:6410 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:02:48,653 INFO    MainThread:6410 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:02:49,195 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/conda-environment.yaml
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/wandb-metadata.json
--2022-04-09 16:02:49,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:50,751 INFO    Thread-16 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/8jmqqlw3-diff.patch
--2022-04-09 16:02:50,752 INFO    Thread-14 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/162ca126-wandb-metadata.json
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/config.yaml
--2022-04-09 16:02:51,196 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:02:51,759 INFO    Thread-15 :6410 [upload_job.py:push():133] Uploaded file /tmp/tmphv1ed_ldwandb/19onurwq-code/train_translation.py
--2022-04-09 16:02:55,197 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:03,207 INFO    Thread-11 :6410 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160246-2bmbfqcy/files/output.log
--2022-04-09 16:03:04,798 INFO    SenderThread:6410 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-diff --git a/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb b/wandb/run-20220409_160246-2bmbfqcy/run-2bmbfqcy.wandb
-deleted file mode 100644
-index e69de29..0000000
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py b/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-deleted file mode 100644
-index 182fd97..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
-+++ /dev/null
-@@ -1,378 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if epoch%args.checkbleu ==0 : 
--
--                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml b/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-deleted file mode 100644
-index a0e0750..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch b/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-deleted file mode 100644
-index 2c51f6a..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
-+++ /dev/null
-@@ -1,470 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..507a499 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,192 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..182fd97 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,98 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if epoch%args.checkbleu ==0 : 
--+
--+                bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..2224b92 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..94d02b9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f7361e5 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160409-1qxpwcwj
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/output.log b/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-deleted file mode 100644
-index 35bceac..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-+++ /dev/null
-@@ -1,18 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt b/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-deleted file mode 100644
-index 440569b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:34:10.122598",
--    "startedAt": "2022-04-09T10:34:09.149412",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json b/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-deleted file mode 100644
-index 52da06b..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 4649.924870014191, "_runtime": 27, "_timestamp": 1649500476, "_step": 0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-deleted file mode 100644
-index bf89eff..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
-+++ /dev/null
-@@ -1,78 +0,0 @@
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,159 DEBUG   MainThread:6703 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send():179] send: header
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,163 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:09,250 DEBUG   SenderThread:6703 [sender.py:send():179] send: run
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:10,119 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():39] meta init
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:__init__():53] meta init done
--2022-04-09 16:04:10,122 DEBUG   HandlerThread:6703 [meta.py:probe():210] probe
--2022-04-09 16:04:10,130 DEBUG   HandlerThread:6703 [meta.py:_setup_git():200] setup git
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:04:10,195 DEBUG   HandlerThread:6703 [meta.py:_save_code():89] save code
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_code():110] save code done
--2022-04-09 16:04:10,211 DEBUG   HandlerThread:6703 [meta.py:_save_patches():127] save patches
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:04:10,306 DEBUG   HandlerThread:6703 [meta.py:_save_pip():57] save pip
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:04:10,307 DEBUG   HandlerThread:6703 [meta.py:_save_conda():78] save conda
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:04:11,657 DEBUG   HandlerThread:6703 [meta.py:probe():252] probe done
--2022-04-09 16:04:11,658 DEBUG   SenderThread:6703 [sender.py:send():179] send: files
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,667 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:11,669 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:12,396 DEBUG   SenderThread:6703 [sender.py:send():179] send: config
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:27,397 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:27,397 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: history
--2022-04-09 16:04:36,357 DEBUG   SenderThread:6703 [sender.py:send():179] send: summary
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:39,168 DEBUG   SenderThread:6703 [sender.py:send():179] send: stats
--2022-04-09 16:04:44,241 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:44,241 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:59,736 DEBUG   HandlerThread:6703 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:04:59,737 DEBUG   SenderThread:6703 [sender.py:send_request():193] send_request: stop_status
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log b/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-deleted file mode 100644
-index 0fbab81..0000000
---- a/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
-+++ /dev/null
-@@ -1,54 +0,0 @@
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/logs/debug-internal.log
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:09,151 INFO    MainThread:6703 [wandb_init.py:init():418] starting backend
--2022-04-09 16:04:09,156 INFO    MainThread:6703 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:04:09,157 INFO    MainThread:6703 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:04:09,158 INFO    wandb_internal:6703 [internal.py:wandb_internal():91] W&B internal server running at pid: 6703, started at: 2022-04-09 16:04:09.157143
--2022-04-09 16:04:09,159 INFO    MainThread:6703 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:04:09,162 INFO    MainThread:6703 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:04:09,163 INFO    WriterThread:6703 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:04:09,248 INFO    MainThread:6703 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files
--2022-04-09 16:04:10,116 INFO    SenderThread:6703 [sender.py:_start_run_threads():707] run started: 1qxpwcwj with start time 1649500449
--2022-04-09 16:04:10,118 INFO    MainThread:6703 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:04:10,119 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/requirements.txt
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/diff.patch
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code/train_translation.py
--2022-04-09 16:04:11,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/code
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:04:11,659 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:04:11,667 INFO    MainThread:6703 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:04:11,670 INFO    MainThread:6703 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:04:11,671 INFO    MainThread:6703 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:04:11,672 INFO    MainThread:6703 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/conda-environment.yaml
--2022-04-09 16:04:12,117 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-metadata.json
--2022-04-09 16:04:12,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:14,579 INFO    Thread-18 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2jyc5la6-diff.patch
--2022-04-09 16:04:15,118 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/config.yaml
--2022-04-09 16:04:16,480 INFO    Thread-14 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/a1u633fb-wandb-metadata.json
--2022-04-09 16:04:16,597 INFO    Thread-15 :6703 [upload_job.py:push():133] Uploaded file /tmp/tmp_q1pzmhpwandb/2s2yhxd4-code/train_translation.py
--2022-04-09 16:04:18,121 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:26,125 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:28,126 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:34,128 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,129 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:36,357 INFO    SenderThread:6703 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:04:37,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/wandb-summary.json
--2022-04-09 16:04:38,334 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
--2022-04-09 16:04:50,337 INFO    Thread-11 :6703 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160409-1qxpwcwj/files/output.log
-diff --git a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb b/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb
-deleted file mode 100644
-index 81c67b9..0000000
-Binary files a/wandb/run-20220409_160409-1qxpwcwj/run-1qxpwcwj.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py b/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml b/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/config.yaml b/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-deleted file mode 100644
-index 1ebd7db..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/diff.patch b/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-deleted file mode 100644
-index 9c4e2ae..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/diff.patch
-+++ /dev/null
-@@ -1,482 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2d0dffc 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,202 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..18dd535 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..b8703a2 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_160908-2097uoqw/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7af087b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_160908-2097uoqw
--\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/output.log b/wandb/run-20220409_160908-2097uoqw/files/output.log
-deleted file mode 100644
-index ed7c7b5..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py --load 0
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt b/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-deleted file mode 100644
-index 3cf53b0..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,27 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:39:09.049034",
--    "startedAt": "2022-04-09T10:39:08.174640",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json b/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-deleted file mode 100644
-index 225791e..0000000
---- a/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5264.9873046875, "_runtime": 162, "_timestamp": 1649500910, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log b/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-deleted file mode 100644
-index 1baf812..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
-+++ /dev/null
-@@ -1,1238 +0,0 @@
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,183 DEBUG   MainThread:7244 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 DEBUG   SenderThread:7244 [sender.py:send():179] send: header
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,187 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:09:08,187 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:08,556 DEBUG   SenderThread:7244 [sender.py:send():179] send: run
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:09,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():39] meta init
--2022-04-09 16:09:09,048 DEBUG   HandlerThread:7244 [meta.py:__init__():53] meta init done
--2022-04-09 16:09:09,049 DEBUG   HandlerThread:7244 [meta.py:probe():210] probe
--2022-04-09 16:09:09,055 DEBUG   HandlerThread:7244 [meta.py:_setup_git():200] setup git
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:09:09,071 DEBUG   HandlerThread:7244 [meta.py:_save_code():89] save code
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_code():110] save code done
--2022-04-09 16:09:09,078 DEBUG   HandlerThread:7244 [meta.py:_save_patches():127] save patches
--2022-04-09 16:09:09,148 DEBUG   HandlerThread:7244 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:09:09,149 DEBUG   HandlerThread:7244 [meta.py:_save_pip():57] save pip
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:09:09,150 DEBUG   HandlerThread:7244 [meta.py:_save_conda():78] save conda
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:09:10,558 DEBUG   HandlerThread:7244 [meta.py:probe():252] probe done
--2022-04-09 16:09:10,559 DEBUG   SenderThread:7244 [sender.py:send():179] send: files
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,033 DEBUG   SenderThread:7244 [sender.py:send():179] send: config
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:09:24,796 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:26,037 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:26,037 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:37,780 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:41,491 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:41,492 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:09:56,929 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:07,915 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:08,466 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:10:12,367 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:12,368 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:10:15,825 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:27,818 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:27,818 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:43,478 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:43,478 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:10:58,974 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,373 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:05,374 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:08,654 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:14,750 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:14,750 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:28,251 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:32,169 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:32,169 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:39,457 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:48,462 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:11:48,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: history
--2022-04-09 16:11:50,289 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:03,967 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:12:03,968 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
--2022-04-09 16:12:05,938 INFO    MainThread:7244 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 16:12:05,939 INFO    MainThread:7244 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:12:06,150 DEBUG   SenderThread:7244 [sender.py:send():179] send: telemetry
--2022-04-09 16:12:06,151 DEBUG   SenderThread:7244 [sender.py:send():179] send: exit
--2022-04-09 16:12:06,151 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 16:12:06,152 INFO    SenderThread:7244 [sender.py:send_exit():295] send defer
--2022-04-09 16:12:06,153 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:06,155 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,155 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 16:12:06,155 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:06,156 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 16:12:06,157 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 16:12:06,158 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,158 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 16:12:06,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 16:12:06,226 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send():179] send: stats
--2022-04-09 16:12:06,227 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,227 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 16:12:06,227 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 16:12:06,227 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 16:12:06,228 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,228 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send():179] send: summary
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:12:06,228 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 16:12:06,228 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 16:12:06,229 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:06,229 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 16:12:06,229 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:06,229 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 16:12:06,259 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:06,450 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:06,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:07,230 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 16:12:07,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,231 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,231 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 16:12:07,231 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 40095
--}
--
--2022-04-09 16:12:07,232 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 16:12:07,232 INFO    SenderThread:7244 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:12:07,333 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:07,451 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:07,453 INFO    SenderThread:7244 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt requirements.txt
--2022-04-09 16:12:07,454 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:12:07,455 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log output.log
--2022-04-09 16:12:07,456 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:12:07,457 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:12:07,467 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml config.yaml
--2022-04-09 16:12:07,468 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch diff.patch
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:12:07,507 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 16:12:07,508 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:07,510 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,510 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 16:12:07,510 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 40095
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:07,511 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:07,511 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 16:12:07,512 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:07,512 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 16:12:07,512 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:07,513 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 16:12:07,612 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,484 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 16:12:08,485 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,486 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,486 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 16:12:08,487 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 16:12:08,487 INFO    SenderThread:7244 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 16:12:08,487 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41552
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,489 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 16:12:08,489 DEBUG   SenderThread:7244 [sender.py:send():179] send: final
--2022-04-09 16:12:08,490 INFO    HandlerThread:7244 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send():179] send: footer
--2022-04-09 16:12:08,490 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: defer
--2022-04-09 16:12:08,490 INFO    SenderThread:7244 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 16:12:08,591 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,591 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,593 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,695 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,695 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,696 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,798 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,798 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,799 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:08,848 INFO    Thread-33 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:12:08,900 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:08,901 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:08,902 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 41657
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,004 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,005 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,006 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,108 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,109 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,110 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,212 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,213 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,214 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,316 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,317 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,318 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,420 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,421 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,422 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,524 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,525 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,526 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,628 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,629 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,630 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,732 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,733 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,734 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 42867
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,837 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,838 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,840 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:09,875 INFO    Thread-32 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:12:09,942 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:09,942 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:09,944 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,046 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,046 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,047 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,149 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,150 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,151 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,253 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,254 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,255 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,304 INFO    Thread-29 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:12:10,357 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,358 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,359 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,461 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,462 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,463 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,772 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,772 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,772 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,874 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,874 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,876 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:10,978 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:10,979 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:10,980 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,082 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,082 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,084 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,186 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,186 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,188 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,290 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,290 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,292 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,314 INFO    Thread-30 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:11,394 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,394 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,396 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,498 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,499 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,500 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,602 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,603 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,604 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,706 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,707 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,708 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,810 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,810 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,812 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:11,914 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:11,915 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:11,916 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,018 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,019 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,020 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,122 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,122 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,124 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,226 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,226 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,228 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,330 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,330 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,332 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,434 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,435 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,436 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,538 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,538 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,540 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,642 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,642 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,644 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,746 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,746 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,747 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,850 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,850 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,852 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:12,954 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:12,954 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:12,955 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,057 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,058 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,059 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,161 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,162 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,163 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,265 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,266 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,267 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,369 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,370 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,371 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,473 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,473 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,475 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,577 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,577 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,578 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,680 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,681 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,682 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,784 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,785 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,786 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,888 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,889 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,890 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:13,992 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:13,993 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:13,994 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,096 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,097 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,098 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,200 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,201 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,202 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,304 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,305 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,307 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,409 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,410 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,411 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,513 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,514 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,515 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,617 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,618 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,619 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,721 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,721 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,723 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,826 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,827 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,829 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:14,931 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:14,931 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:14,933 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,034 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,035 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,037 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,138 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,139 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,141 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,244 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,244 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,245 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,348 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,348 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,350 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,453 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,454 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,461 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,565 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,566 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,567 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,669 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,669 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,671 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,773 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,773 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,775 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,877 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,877 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,879 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:15,981 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:15,982 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:15,983 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 47441
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,085 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,086 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,087 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,189 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,190 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,191 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,293 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,294 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,295 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,397 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,398 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,399 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,501 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,502 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,503 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,605 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,606 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,607 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,709 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,710 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,711 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,813 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,814 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,816 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:16,918 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:16,919 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:16,920 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,022 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,023 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,024 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,126 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,127 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,128 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,230 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,230 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,232 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,334 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,335 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,336 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,374 INFO    Thread-31 :7244 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:12:17,438 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,438 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,440 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,542 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,543 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,544 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,646 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 16:12:17,647 DEBUG   SenderThread:7244 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 16:12:17,647 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:17,648 INFO    MainThread:7244 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 50723
--  total_bytes: 50723
--}
--
--2022-04-09 16:12:17,650 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 16:12:17,653 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 16:12:17,656 DEBUG   HandlerThread:7244 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 16:12:17,656 INFO    HandlerThread:7244 [handler.py:finish():638] shutting down handler
--2022-04-09 16:12:18,493 INFO    WriterThread:7244 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:12:18,647 INFO    SenderThread:7244 [sender.py:finish():933] shutting down sender
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:12:18,648 INFO    SenderThread:7244 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:12:18,661 INFO    MainThread:7244 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 16:12:18,662 INFO    MainThread:7244 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 16:12:18,663 INFO    MainThread:7244 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 16:12:18,709 INFO    MainThread:7244 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_160908-2097uoqw/logs/debug.log b/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-deleted file mode 100644
-index ad8f755..0000000
---- a/wandb/run-20220409_160908-2097uoqw/logs/debug.log
-+++ /dev/null
-@@ -1,77 +0,0 @@
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug.log
--2022-04-09 16:09:08,175 INFO    MainThread:7244 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/logs/debug-internal.log
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:08,176 INFO    MainThread:7244 [wandb_init.py:init():418] starting backend
--2022-04-09 16:09:08,180 INFO    MainThread:7244 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:09:08,181 INFO    wandb_internal:7244 [internal.py:wandb_internal():91] W&B internal server running at pid: 7244, started at: 2022-04-09 16:09:08.181261
--2022-04-09 16:09:08,182 INFO    MainThread:7244 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:09:08,183 INFO    MainThread:7244 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:09:08,186 INFO    MainThread:7244 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:09:08,186 INFO    WriterThread:7244 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:09:08,555 INFO    MainThread:7244 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files
--2022-04-09 16:09:09,044 INFO    SenderThread:7244 [sender.py:_start_run_threads():707] run started: 2097uoqw with start time 1649500748
--2022-04-09 16:09:09,045 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:09,046 INFO    MainThread:7244 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/diff.patch
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:10,046 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/requirements.txt
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code/train_translation.py
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:10,047 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/code
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:09:10,560 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:09:10,561 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:09:10,566 INFO    MainThread:7244 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:09:10,570 INFO    MainThread:7244 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:09:10,574 INFO    MainThread:7244 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:09:11,076 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/conda-environment.yaml
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-metadata.json
--2022-04-09 16:09:11,080 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:12,541 INFO    Thread-14 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/59p33rsf-wandb-metadata.json
--2022-04-09 16:09:12,542 INFO    Thread-22 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/1s3licml-diff.patch
--2022-04-09 16:09:12,543 INFO    Thread-17 :7244 [upload_job.py:push():133] Uploaded file /tmp/tmpaa9c8yvlwandb/g430jhga-code/train_translation.py
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/config.yaml
--2022-04-09 16:09:13,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:15,070 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:17,071 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:23,074 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:24,796 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:09:25,075 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:09:39,079 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:07,924 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:08,089 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:13,091 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:15,825 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:10:16,092 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:10:17,093 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:10:29,096 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:03,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,105 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:05,374 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:06,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:07,393 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:21,397 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:27,410 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:28,296 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:28,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:29,411 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:43,415 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:47,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:49,437 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:11:50,291 INFO    SenderThread:7244 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:11:50,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/wandb-summary.json
--2022-04-09 16:11:51,438 INFO    Thread-11 :7244 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_160908-2097uoqw/files/output.log
--2022-04-09 16:12:05,937 INFO    MainThread:7244 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/2097uoqw
-diff --git a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb b/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb
-deleted file mode 100644
-index b5995f1..0000000
-Binary files a/wandb/run-20220409_160908-2097uoqw/run-2097uoqw.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py b/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-deleted file mode 100644
-index 529add4..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
-+++ /dev/null
-@@ -1,380 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--        if args.rank == 0: 
--            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--            print('test_bleu_score', bleu_score)
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml b/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/config.yaml b/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/diff.patch b/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-deleted file mode 100644
-index aa6c773..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/diff.patch
-+++ /dev/null
-@@ -1,528 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..2aaecf9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,248 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..529add4 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,100 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+        if args.rank == 0: 
--+            bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+            print('test_bleu_score', bleu_score)
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..91bb884 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..252e468 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_161421-3t82t88x/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..c99b343 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_161421-3t82t88x
--\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/output.log b/wandb/run-20220409_161421-3t82t88x/files/output.log
-deleted file mode 100644
-index 3bf650b..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/output.log
-+++ /dev/null
-@@ -1,67 +0,0 @@
--
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Exception in thread Thread-15:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
--Exception: The wandb backend process has shutdown
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt b/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-deleted file mode 100644
-index f9df6f1..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:44:23.094487",
--    "startedAt": "2022-04-09T10:44:21.821617",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json b/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log b/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-deleted file mode 100644
-index 3f70132..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,831 DEBUG   MainThread:8815 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send():179] send: header
--2022-04-09 16:14:21,835 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:21,939 DEBUG   SenderThread:8815 [sender.py:send():179] send: run
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,090 DEBUG   SenderThread:8815 [sender.py:send():179] send: summary
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:23,092 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():39] meta init
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:__init__():53] meta init done
--2022-04-09 16:14:23,094 DEBUG   HandlerThread:8815 [meta.py:probe():210] probe
--2022-04-09 16:14:23,100 DEBUG   HandlerThread:8815 [meta.py:_setup_git():200] setup git
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:14:23,122 DEBUG   HandlerThread:8815 [meta.py:_save_code():89] save code
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_code():110] save code done
--2022-04-09 16:14:23,133 DEBUG   HandlerThread:8815 [meta.py:_save_patches():127] save patches
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:14:23,196 DEBUG   HandlerThread:8815 [meta.py:_save_pip():57] save pip
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:14:23,197 DEBUG   HandlerThread:8815 [meta.py:_save_conda():78] save conda
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,537 DEBUG   HandlerThread:8815 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:14:24,538 DEBUG   HandlerThread:8815 [meta.py:probe():252] probe done
--2022-04-09 16:14:24,539 DEBUG   SenderThread:8815 [sender.py:send():179] send: files
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,548 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:24,548 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:25,577 DEBUG   SenderThread:8815 [sender.py:send():179] send: config
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:40,579 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:40,579 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:14:51,743 DEBUG   SenderThread:8815 [sender.py:send():179] send: stats
--2022-04-09 16:14:56,424 DEBUG   HandlerThread:8815 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:14:56,424 DEBUG   SenderThread:8815 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:15:01,820 DEBUG   SenderThread:8815 [sender.py:send():179] send: history
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/logs/debug.log b/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-deleted file mode 100644
-index 99b6b97..0000000
---- a/wandb/run-20220409_161421-3t82t88x/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug.log
--2022-04-09 16:14:21,822 INFO    MainThread:8815 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/logs/debug-internal.log
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:21,823 INFO    MainThread:8815 [wandb_init.py:init():418] starting backend
--2022-04-09 16:14:21,828 INFO    MainThread:8815 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:14:21,829 INFO    wandb_internal:8815 [internal.py:wandb_internal():91] W&B internal server running at pid: 8815, started at: 2022-04-09 16:14:21.828726
--2022-04-09 16:14:21,829 INFO    MainThread:8815 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:14:21,830 INFO    MainThread:8815 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:14:21,834 INFO    MainThread:8815 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:14:21,835 INFO    WriterThread:8815 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:14:21,935 INFO    MainThread:8815 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:14:21,936 INFO    MainThread:8815 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:14:23,089 INFO    SenderThread:8815 [sender.py:_start_run_threads():707] run started: 3t82t88x with start time 1649501061
--2022-04-09 16:14:23,091 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:14:23,091 INFO    MainThread:8815 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py
--2022-04-09 16:14:24,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:14:24,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code
--2022-04-09 16:14:24,539 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:14:24,540 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:14:24,541 INFO    SenderThread:8815 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:14:24,547 INFO    MainThread:8815 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:14:24,551 INFO    MainThread:8815 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:14:24,552 INFO    MainThread:8815 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:14:24,553 INFO    MainThread:8815 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:14:25,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json
--2022-04-09 16:14:25,093 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:26,654 INFO    Thread-14 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1woflnrf-wandb-metadata.json
--2022-04-09 16:14:26,655 INFO    Thread-17 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/2g34m9v2-code/train_translation.py
--2022-04-09 16:14:27,090 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:27,669 INFO    Thread-18 :8815 [upload_job.py:push():133] Uploaded file /tmp/tmpyo0egpl2wandb/1gwzitp2-diff.patch
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:14:29,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:14:31,091 INFO    Thread-11 :8815 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:01,820 INFO    WriterThread:8815 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
--2022-04-09 16:15:01,820 INFO    SenderThread:8815 [sender.py:finish():933] shutting down sender
--2022-04-09 16:15:01,821 INFO    SenderThread:8815 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:15:02,097 INFO    SenderThread:8815 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files
--2022-04-09 16:15:02,098 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt requirements.txt
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:15:02,099 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log output.log
--2022-04-09 16:15:02,120 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:15:02,121 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:15:02,142 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml config.yaml
--2022-04-09 16:15:02,153 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/diff.patch diff.patch
--2022-04-09 16:15:02,165 INFO    SenderThread:8815 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:15:02,166 INFO    SenderThread:8815 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:15:04,027 INFO    Thread-25 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/output.log
--2022-04-09 16:15:04,029 INFO    Thread-27 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/wandb-summary.json
--2022-04-09 16:15:04,030 INFO    Thread-24 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/requirements.txt
--2022-04-09 16:15:04,034 INFO    Thread-26 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/conda-environment.yaml
--2022-04-09 16:15:04,036 INFO    Thread-28 :8815 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_161421-3t82t88x/files/config.yaml
--2022-04-09 16:15:05,015 ERROR   wandb_internal:8815 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 16:24:49,089 INFO    MainThread:8815 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 16:24:49,090 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,379 INFO    MainThread:8815 [wandb_run.py:_restore():1480] restore
--2022-04-09 16:24:49,381 INFO    MainThread:8815 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb b/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb
-deleted file mode 100644
-index a4486ce..0000000
-Binary files a/wandb/run-20220409_161421-3t82t88x/run-3t82t88x.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py b/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml b/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/config.yaml b/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-deleted file mode 100644
-index f0ae705..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 1
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/diff.patch b/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-deleted file mode 100644
-index 9eddab1..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/diff.patch
-+++ /dev/null
-@@ -1,560 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..353da1f 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,249 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..f0332eb 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..97853e9 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_162621-m83puhmm/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..7be71e2 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_162621-m83puhmm
--\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/output.log b/wandb/run-20220409_162621-m83puhmm/files/output.log
-deleted file mode 100644
-index ee1c9e3..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/output.log
-+++ /dev/null
-@@ -1,52 +0,0 @@
--
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--train_translation.py --load 0 --test_translation 1
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--test_bleu_score 0.0
--Exception in thread Thread-6:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--[34m[1mwandb[39m[22m: [32m[41mERROR[39m[49m Internal wandb error: file data was not synced
--Traceback (most recent call last):
--  File "<string>", line 1, in <module>
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
--    exitcode = _main(fd)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/spawn.py", line 118, in _main
--    return self._bootstrap()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/process.py", line 315, in _bootstrap
--    threading._shutdown()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 1307, in _shutdown
--    lock.acquire()
--KeyboardInterrupt
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt b/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-deleted file mode 100644
-index 4ce8f76..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,29 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T10:56:22.902051",
--    "startedAt": "2022-04-09T10:56:21.924771",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--load",
--        "0",
--        "--test_translation",
--        "1"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json b/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-deleted file mode 100644
-index 9e26dfe..0000000
---- a/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log b/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-deleted file mode 100644
-index 7032449..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
-+++ /dev/null
-@@ -1,107 +0,0 @@
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,934 DEBUG   MainThread:9280 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 16:26:21,937 DEBUG   SenderThread:9280 [sender.py:send():179] send: header
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:21,938 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: check_version
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,344 DEBUG   SenderThread:9280 [sender.py:send():179] send: run
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,889 DEBUG   SenderThread:9280 [sender.py:send():179] send: summary
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:22,895 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():39] meta init
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:__init__():53] meta init done
--2022-04-09 16:26:22,901 DEBUG   HandlerThread:9280 [meta.py:probe():210] probe
--2022-04-09 16:26:22,908 DEBUG   HandlerThread:9280 [meta.py:_setup_git():200] setup git
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_setup_git():207] setup git done
--2022-04-09 16:26:22,953 DEBUG   HandlerThread:9280 [meta.py:_save_code():89] save code
--2022-04-09 16:26:22,972 DEBUG   HandlerThread:9280 [meta.py:_save_code():110] save code done
--2022-04-09 16:26:22,973 DEBUG   HandlerThread:9280 [meta.py:_save_patches():127] save patches
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_patches():169] save patches done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():57] save pip
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_pip():71] save pip done
--2022-04-09 16:26:23,081 DEBUG   HandlerThread:9280 [meta.py:_save_conda():78] save conda
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:_save_conda():86] save conda done
--2022-04-09 16:26:24,438 DEBUG   HandlerThread:9280 [meta.py:probe():252] probe done
--2022-04-09 16:26:24,440 DEBUG   SenderThread:9280 [sender.py:send():179] send: files
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:24,448 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:24,898 DEBUG   SenderThread:9280 [sender.py:send():179] send: config
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:39,905 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:39,905 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:26:51,624 DEBUG   SenderThread:9280 [sender.py:send():179] send: stats
--2022-04-09 16:26:55,340 DEBUG   HandlerThread:9280 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 16:26:55,340 DEBUG   SenderThread:9280 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 16:27:06,912 DEBUG   SenderThread:9280 [sender.py:send():179] send: history
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/logs/debug.log b/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-deleted file mode 100644
-index 5053427..0000000
---- a/wandb/run-20220409_162621-m83puhmm/logs/debug.log
-+++ /dev/null
-@@ -1,85 +0,0 @@
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 16:26:21,925 INFO    MainThread:9280 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/logs/debug-internal.log
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():369] calling init triggers
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:21,926 INFO    MainThread:9280 [wandb_init.py:init():418] starting backend
--2022-04-09 16:26:21,931 INFO    MainThread:9280 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 16:26:21,932 INFO    wandb_internal:9280 [internal.py:wandb_internal():91] W&B internal server running at pid: 9280, started at: 2022-04-09 16:26:21.931687
--2022-04-09 16:26:21,932 INFO    MainThread:9280 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 16:26:21,934 INFO    MainThread:9280 [wandb_init.py:init():423] backend started and connected
--2022-04-09 16:26:21,936 INFO    MainThread:9280 [wandb_init.py:init():465] updated telemetry
--2022-04-09 16:26:21,937 INFO    MainThread:9280 [wandb_init.py:init():484] communicating current version
--2022-04-09 16:26:21,937 INFO    WriterThread:9280 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:26:22,343 INFO    MainThread:9280 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 16:26:22,344 INFO    MainThread:9280 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 16:26:22,884 INFO    SenderThread:9280 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:26:22,885 INFO    SenderThread:9280 [sender.py:_start_run_threads():707] run started: m83puhmm with start time 1649501781
--2022-04-09 16:26:22,890 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 16:26:22,893 INFO    MainThread:9280 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:26:23,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py
--2022-04-09 16:26:23,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code
--2022-04-09 16:26:24,440 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 16:26:24,441 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 16:26:24,442 INFO    SenderThread:9280 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 16:26:24,448 INFO    MainThread:9280 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 16:26:24,450 INFO    MainThread:9280 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 16:26:24,451 INFO    MainThread:9280 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 16:26:24,454 INFO    MainThread:9280 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 1, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 16:26:24,885 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json
--2022-04-09 16:26:24,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:25,823 INFO    Thread-17 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/xb2dntmc-code/train_translation.py
--2022-04-09 16:26:25,824 INFO    Thread-14 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/a41a1xzf-wandb-metadata.json
--2022-04-09 16:26:26,830 INFO    Thread-22 :9280 [upload_job.py:push():133] Uploaded file /tmp/tmpnmpgmtujwandb/3ttad6f8-diff.patch
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:26:26,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:28,886 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:26:30,887 INFO    Thread-11 :9280 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:06,912 INFO    SenderThread:9280 [sender.py:finish():933] shutting down sender
--2022-04-09 16:27:06,913 INFO    SenderThread:9280 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files
--2022-04-09 16:27:07,894 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt requirements.txt
--2022-04-09 16:27:07,895 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 16:27:07,896 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log output.log
--2022-04-09 16:27:07,903 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 16:27:07,904 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json wandb-summary.json
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml config.yaml
--2022-04-09 16:27:07,905 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/diff.patch diff.patch
--2022-04-09 16:27:07,908 INFO    SenderThread:9280 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/code/train_translation.py code/train_translation.py
--2022-04-09 16:27:07,909 INFO    SenderThread:9280 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 16:27:07,910 INFO    SenderThread:9280 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 16:27:07,912 INFO    WriterThread:9280 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
--2022-04-09 16:27:09,044 INFO    Thread-25 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/output.log
--2022-04-09 16:27:09,053 INFO    Thread-26 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/conda-environment.yaml
--2022-04-09 16:27:09,056 INFO    Thread-24 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/requirements.txt
--2022-04-09 16:27:09,061 INFO    Thread-27 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/wandb-summary.json
--2022-04-09 16:27:09,079 INFO    Thread-28 :9280 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_162621-m83puhmm/files/config.yaml
--2022-04-09 16:27:09,727 ERROR   wandb_internal:9280 [internal.py:wandb_internal():159] Thread HandlerThread:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 55, in run
--    self._run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/internal/internal_util.py", line 102, in _run
--    record = self._input_record_q.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
--2022-04-09 17:37:10,785 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,969 INFO    MainThread:9280 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:37:10,971 INFO    MainThread:9280 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb b/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb
-deleted file mode 100644
-index 978cbe5..0000000
-Binary files a/wandb/run-20220409_162621-m83puhmm/run-m83puhmm.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py b/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml b/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-deleted file mode 100644
-index 1988ff1..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 4
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 256
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 5
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 1
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 3
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 1
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch b/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-deleted file mode 100644
-index d503875..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
-+++ /dev/null
-@@ -1,561 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..b0966e9 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,250 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..1486dd6 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..071678f 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..be8b91a 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_173901-1dj6b5jf
--\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/output.log b/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-deleted file mode 100644
-index f4f17d5..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/output.log
-+++ /dev/null
-@@ -1,59 +0,0 @@
--
--train_translation.py
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--translation model saved in checkpoint
--{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--translation model saved in checkpoint
--{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--translation model saved in checkpoint
--{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt b/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-deleted file mode 100644
-index 6c00633..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,24 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:09:01.944494",
--    "startedAt": "2022-04-09T12:09:01.199712",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json b/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-deleted file mode 100644
-index c0804b4..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 5045.823547363281, "_runtime": 154, "_timestamp": 1649506295, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-deleted file mode 100644
-index 67f5897..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
-+++ /dev/null
-@@ -1,418 +0,0 @@
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,208 DEBUG   MainThread:10760 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send():179] send: header
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,212 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,337 DEBUG   SenderThread:10760 [sender.py:send():179] send: run
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:01,942 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():39] meta init
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:__init__():53] meta init done
--2022-04-09 17:39:01,944 DEBUG   HandlerThread:10760 [meta.py:probe():210] probe
--2022-04-09 17:39:01,950 DEBUG   HandlerThread:10760 [meta.py:_setup_git():200] setup git
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:39:01,967 DEBUG   HandlerThread:10760 [meta.py:_save_code():89] save code
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_code():110] save code done
--2022-04-09 17:39:01,975 DEBUG   HandlerThread:10760 [meta.py:_save_patches():127] save patches
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():57] save pip
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:39:02,020 DEBUG   HandlerThread:10760 [meta.py:_save_conda():78] save conda
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:39:03,360 DEBUG   HandlerThread:10760 [meta.py:probe():252] probe done
--2022-04-09 17:39:03,362 DEBUG   SenderThread:10760 [sender.py:send():179] send: files
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,372 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:03,372 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,822 DEBUG   SenderThread:10760 [sender.py:send():179] send: config
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:39:16,267 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:18,825 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:18,826 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:30,755 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:39:34,298 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:34,298 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:39:49,766 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:01,384 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:05,203 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:05,204 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:20,708 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,724 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:20,725 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,136 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:27,137 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:32,273 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:40:36,248 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:36,249 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:40:47,641 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:51,681 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:40:51,682 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:02,941 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,142 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:07,142 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:07,869 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:22,870 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:22,871 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:33,728 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: history
--2022-04-09 17:41:35,959 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,321 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:41:38,322 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:_restore():1480] restore
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: telemetry
--2022-04-09 17:41:51,002 DEBUG   SenderThread:10760 [sender.py:send():179] send: exit
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 17:41:51,003 INFO    SenderThread:10760 [sender.py:send_exit():295] send defer
--2022-04-09 17:41:51,004 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,005 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,006 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,006 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 17:41:51,007 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,008 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,008 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 17:41:51,009 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 17:41:51,009 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,010 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 17:41:51,062 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,062 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send():179] send: stats
--2022-04-09 17:41:51,063 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,063 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 17:41:51,063 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,063 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 17:41:51,064 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,064 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 17:41:51,064 DEBUG   SenderThread:10760 [sender.py:send():179] send: summary
--2022-04-09 17:41:51,064 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 17:41:51,065 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,065 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 17:41:51,065 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,065 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 17:41:51,109 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:51,203 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:51,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:51,546 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 17:41:51,546 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:51,546 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:51,546 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 44166
--}
--
--2022-04-09 17:41:51,546 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 17:41:51,547 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 17:41:51,547 INFO    SenderThread:10760 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 17:41:51,648 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,204 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:41:52,206 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt requirements.txt
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 17:41:52,207 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log output.log
--2022-04-09 17:41:52,208 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 17:41:52,209 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json wandb-summary.json
--2022-04-09 17:41:52,218 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml config.yaml
--2022-04-09 17:41:52,220 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch diff.patch
--2022-04-09 17:41:52,222 INFO    SenderThread:10760 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py code/train_translation.py
--2022-04-09 17:41:52,224 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 17:41:52,224 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 17:41:52,225 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:52,225 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 17:41:52,225 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,225 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 17:41:52,225 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,226 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,226 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 17:41:52,328 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,842 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 17:41:52,842 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,844 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,844 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 17:41:52,845 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:52,846 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 17:41:52,846 INFO    SenderThread:10760 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 17:41:52,848 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 17:41:52,848 DEBUG   SenderThread:10760 [sender.py:send():179] send: final
--2022-04-09 17:41:52,849 INFO    HandlerThread:10760 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 17:41:52,849 DEBUG   SenderThread:10760 [sender.py:send():179] send: footer
--2022-04-09 17:41:52,850 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: defer
--2022-04-09 17:41:52,850 INFO    SenderThread:10760 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 17:41:52,947 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:52,947 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:52,948 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 44166
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,049 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,050 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,051 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 45730
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,153 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,153 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,155 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,256 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,257 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,258 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,360 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,361 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,362 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,464 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,465 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,466 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,502 INFO    Thread-33 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:41:53,504 INFO    Thread-29 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:41:53,512 INFO    Thread-32 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:53,524 INFO    Thread-31 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:41:53,568 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,568 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,569 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,671 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,672 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,673 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,775 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,776 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,777 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,879 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,879 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,881 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:53,983 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:53,983 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:53,984 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,033 INFO    Thread-30 :10760 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:54,086 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,087 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,088 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,190 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,190 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,192 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,294 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 17:41:54,294 DEBUG   SenderThread:10760 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 17:41:54,294 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:54,295 INFO    MainThread:10760 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 54768
--  total_bytes: 54768
--}
--
--2022-04-09 17:41:54,297 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 17:41:54,299 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 17:41:54,302 DEBUG   HandlerThread:10760 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 17:41:54,302 INFO    HandlerThread:10760 [handler.py:finish():638] shutting down handler
--2022-04-09 17:41:54,849 INFO    WriterThread:10760 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [sender.py:finish():933] shutting down sender
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 17:41:55,295 INFO    SenderThread:10760 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 17:41:55,308 INFO    MainThread:10760 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 17:41:55,309 INFO    MainThread:10760 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 17:41:55,310 INFO    MainThread:10760 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 17:41:55,323 INFO    MainThread:10760 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log b/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-deleted file mode 100644
-index 2ea4289..0000000
---- a/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
-+++ /dev/null
-@@ -1,73 +0,0 @@
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug.log
--2022-04-09 17:39:01,200 INFO    MainThread:10760 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/logs/debug-internal.log
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
--config: {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:01,201 INFO    MainThread:10760 [wandb_init.py:init():418] starting backend
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:39:01,206 INFO    MainThread:10760 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:39:01,207 INFO    wandb_internal:10760 [internal.py:wandb_internal():91] W&B internal server running at pid: 10760, started at: 2022-04-09 17:39:01.206592
--2022-04-09 17:39:01,208 INFO    MainThread:10760 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:39:01,211 INFO    MainThread:10760 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:39:01,212 INFO    WriterThread:10760 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:39:01,333 INFO    MainThread:10760 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files
--2022-04-09 17:39:01,939 INFO    SenderThread:10760 [sender.py:_start_run_threads():707] run started: 1dj6b5jf with start time 1649506141
--2022-04-09 17:39:01,941 INFO    MainThread:10760 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:39:01,941 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/requirements.txt
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code/train_translation.py
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/diff.patch
--2022-04-09 17:39:02,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/code
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:39:03,362 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:39:03,363 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:39:03,372 INFO    MainThread:10760 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:39:03,374 INFO    MainThread:10760 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:39:03,375 INFO    MainThread:10760 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:39:03,376 INFO    MainThread:10760 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 5, 'batch_size': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 256, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 1, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 1}
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/conda-environment.yaml
--2022-04-09 17:39:03,940 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-metadata.json
--2022-04-09 17:39:03,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:04,556 INFO    Thread-14 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/2bsevvzq-wandb-metadata.json
--2022-04-09 17:39:04,570 INFO    Thread-15 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/229pqnc8-code/train_translation.py
--2022-04-09 17:39:05,340 INFO    Thread-17 :10760 [upload_job.py:push():133] Uploaded file /tmp/tmpfwfmk75uwandb/1kcug5yp-diff.patch
--2022-04-09 17:39:05,941 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/config.yaml
--2022-04-09 17:39:05,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:07,942 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:09,943 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:15,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:16,268 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:39:16,945 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:39:17,946 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:39:29,954 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:20,709 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:20,973 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:27,137 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:28,142 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:44,154 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:47,642 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:40:48,158 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:40:50,160 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:04,169 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:07,869 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:08,170 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:10,171 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:32,187 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:35,960 INFO    SenderThread:10760 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/wandb-summary.json
--2022-04-09 17:41:36,192 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:38,194 INFO    Thread-11 :10760 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_173901-1dj6b5jf/files/output.log
--2022-04-09 17:41:50,823 INFO    MainThread:10760 [wandb_run.py:finish():1208] finishing run tera_squid/translation_test/1dj6b5jf
-diff --git a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb b/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb
-deleted file mode 100644
-index c939775..0000000
-Binary files a/wandb/run-20220409_173901-1dj6b5jf/run-1dj6b5jf.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py b/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml b/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/config.yaml b/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-deleted file mode 100644
-index 0b2ef04..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 128
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 24
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/diff.patch b/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-deleted file mode 100644
-index a6f8b6d..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/diff.patch
-+++ /dev/null
-@@ -1,634 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e11eb21 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,302 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..a3e7597 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..453b7bc 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_175151-z44hpswp/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..b2d6ded 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_175151-z44hpswp
--\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/output.log b/wandb/run-20220409_175151-z44hpswp/files/output.log
-deleted file mode 100644
-index 2224687..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/output.log
-+++ /dev/null
-@@ -1,48 +0,0 @@
--
--train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--translation model saved in checkpoint
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--translation model saved in checkpoint
--translation model saved in checkpoint
--{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--translation model saved in checkpoint
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt b/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-deleted file mode 100644
-index e3bc5e0..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:21:52.829321",
--    "startedAt": "2022-04-09T12:21:51.786614",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=128",
--        "--dfeedforward=1024",
--        "--epochs=24",
--        "--nhead=4",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json b/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-deleted file mode 100644
-index 4d8b4c3..0000000
---- a/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 107.22583770751953, "_runtime": 695, "_timestamp": 1649507606, "_step": 28, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log b/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-deleted file mode 100644
-index 552d2f2..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
-+++ /dev/null
-@@ -1,620 +0,0 @@
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,796 DEBUG   MainThread:14720 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send():179] send: header
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:51,800 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: check_version
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,171 DEBUG   SenderThread:14720 [sender.py:send():179] send: run
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,825 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:52,827 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():39] meta init
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:__init__():53] meta init done
--2022-04-09 17:51:52,829 DEBUG   HandlerThread:14720 [meta.py:probe():210] probe
--2022-04-09 17:51:52,837 DEBUG   HandlerThread:14720 [meta.py:_setup_git():200] setup git
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_setup_git():207] setup git done
--2022-04-09 17:51:52,869 DEBUG   HandlerThread:14720 [meta.py:_save_code():89] save code
--2022-04-09 17:51:52,876 DEBUG   HandlerThread:14720 [meta.py:_save_code():110] save code done
--2022-04-09 17:51:52,877 DEBUG   HandlerThread:14720 [meta.py:_save_patches():127] save patches
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_patches():169] save patches done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():57] save pip
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_pip():71] save pip done
--2022-04-09 17:51:52,928 DEBUG   HandlerThread:14720 [meta.py:_save_conda():78] save conda
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:_save_conda():86] save conda done
--2022-04-09 17:51:54,259 DEBUG   HandlerThread:14720 [meta.py:probe():252] probe done
--2022-04-09 17:51:54,261 DEBUG   SenderThread:14720 [sender.py:send():179] send: files
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,272 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:51:54,272 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: config
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:06,575 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:09,721 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:09,721 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:21,569 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:25,148 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:25,149 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:40,576 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:49,874 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:52,213 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:52:55,651 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,140 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:52:56,140 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:11,596 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:11,597 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:14,741 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:23,054 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:27,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:27,074 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:38,173 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:42,499 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:42,500 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:53,596 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:57,929 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:53:57,929 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:53:59,413 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:53:59,414 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:13,359 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:13,359 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,344 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:54:20,345 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:24,527 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:28,793 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:28,793 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:44,227 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:44,227 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:54:55,062 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:54:59,653 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:54:59,653 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:11,338 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:11,339 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:15,098 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:15,099 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:17,278 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:25,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:30,519 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:30,519 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:55:37,281 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:45,955 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:55:45,956 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:55:56,468 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:01,086 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:01,589 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:17,078 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:17,078 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:23,379 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:27,343 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:56:32,522 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:32,522 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:56:46,540 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:47,961 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:56:47,961 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:56:57,925 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:03,390 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:03,390 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:18,853 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:18,853 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:28,552 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:57:34,280 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:34,280 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:39,211 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:57:45,145 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:49,734 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:57:49,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:57:59,325 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,341 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:05,342 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:05,789 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:20,790 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:20,790 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:29,955 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:30,176 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:36,214 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:36,214 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:51,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:58:51,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:58:52,209 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:00,845 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:07,147 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:07,147 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 17:59:13,797 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:22,588 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:22,588 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:31,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:38,008 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:38,008 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 17:59:53,449 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 17:59:53,450 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:02,140 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:07,706 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:08,884 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:08,884 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:13,617 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:13,618 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:24,366 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:24,367 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:32,786 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:00:36,584 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:39,806 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:39,806 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,224 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:00:55,225 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,715 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:00,716 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:03,610 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:10,649 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:10,649 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:22,153 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:26,073 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:26,073 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:34,217 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:41,491 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:41,492 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,993 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:01:43,994 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:56,918 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:01:56,918 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:04,763 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:12,340 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:12,340 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:27,774 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:35,408 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:38,748 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:43,201 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:43,201 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:44,434 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:02:44,435 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:58,647 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:02:58,647 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:03,720 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:06,291 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:14,117 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:14,117 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,051 DEBUG   SenderThread:14720 [sender.py:send():179] send: history
--2022-04-09 18:03:26,052 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:29,557 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:29,559 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:36,939 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:03:42,324 INFO    MainThread:14720 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:03:43,079 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:43,080 DEBUG   SenderThread:14720 [sender.py:send():179] send: telemetry
--2022-04-09 18:03:43,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:43,580 DEBUG   SenderThread:14720 [sender.py:send():179] send: exit
--2022-04-09 18:03:43,580 INFO    SenderThread:14720 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:03:43,581 INFO    SenderThread:14720 [sender.py:send_exit():295] send defer
--2022-04-09 18:03:43,581 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:43,582 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,583 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:03:43,583 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:03:43,584 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:03:43,584 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 48639
--}
--
--2022-04-09 18:03:43,585 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,586 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:03:43,657 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,657 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send():179] send: stats
--2022-04-09 18:03:43,658 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,658 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:03:43,658 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:03:43,658 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send():179] send: summary
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:43,659 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:03:43,659 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:03:43,659 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,659 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:03:43,660 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:43,660 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:03:43,660 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:03:43,660 INFO    SenderThread:14720 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:03:43,686 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:44,248 INFO    SenderThread:14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt requirements.txt
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log output.log
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml config.yaml
--2022-04-09 18:03:44,249 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch diff.patch
--2022-04-09 18:03:44,251 INFO    SenderThread:14720 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:03:44,253 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:03:44,253 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,254 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,258 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:03:44,260 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,260 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:44,260 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:03:44,261 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,261 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:03:44,261 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,261 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:03:44,361 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:44,907 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:03:44,908 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:44,908 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,908 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:03:44,909 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 48639
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:44,909 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:03:44,909 INFO    SenderThread:14720 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:03:44,910 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:03:44,910 INFO    HandlerThread:14720 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: final
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send():179] send: footer
--2022-04-09 18:03:44,911 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:03:44,911 INFO    SenderThread:14720 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:03:45,010 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,011 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,012 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,115 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,116 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,117 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,219 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,219 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,221 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,323 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,323 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,325 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,427 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,427 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,428 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,466 INFO    Thread-54 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 18:03:45,472 INFO    Thread-52 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 18:03:45,476 INFO    Thread-53 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:45,530 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,531 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,532 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,634 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,635 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,636 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,738 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,739 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,740 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,842 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,842 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,844 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:45,946 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:45,946 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:45,948 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,050 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,051 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,053 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,155 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,156 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,157 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,184 INFO    Thread-56 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 18:03:46,188 INFO    Thread-55 :14720 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:46,259 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,259 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,261 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,363 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,364 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,365 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,468 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:03:46,469 DEBUG   SenderThread:14720 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:03:46,469 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:46,470 INFO    MainThread:14720 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 58315
--  total_bytes: 58315
--}
--
--2022-04-09 18:03:46,472 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:03:46,474 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:03:46,477 DEBUG   HandlerThread:14720 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:03:46,478 INFO    HandlerThread:14720 [handler.py:finish():638] shutting down handler
--2022-04-09 18:03:46,911 INFO    WriterThread:14720 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 18:03:47,469 INFO    SenderThread:14720 [sender.py:finish():933] shutting down sender
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:03:47,470 INFO    SenderThread:14720 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:03:47,483 INFO    MainThread:14720 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:03:47,484 INFO    MainThread:14720 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:03:47,485 INFO    MainThread:14720 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:03:47,525 INFO    MainThread:14720 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_175151-z44hpswp/logs/debug.log b/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-deleted file mode 100644
-index bb769fe..0000000
---- a/wandb/run-20220409_175151-z44hpswp/logs/debug.log
-+++ /dev/null
-@@ -1,140 +0,0 @@
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'z44hpswp', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-z44hpswp.yaml', 'start_method': 'thread'}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug.log
--2022-04-09 17:51:51,787 INFO    MainThread:14720 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/logs/debug-internal.log
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():369] calling init triggers
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 24, 'batch_size': 128, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:51,788 INFO    MainThread:14720 [wandb_init.py:init():418] starting backend
--2022-04-09 17:51:51,793 INFO    MainThread:14720 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 17:51:51,794 INFO    wandb_internal:14720 [internal.py:wandb_internal():91] W&B internal server running at pid: 14720, started at: 2022-04-09 17:51:51.793927
--2022-04-09 17:51:51,795 INFO    MainThread:14720 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 17:51:51,796 INFO    MainThread:14720 [wandb_init.py:init():423] backend started and connected
--2022-04-09 17:51:51,797 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 128, 'dfeedforward': 1024, 'epochs': 24, 'nhead': 4, 'nlayers': 4}
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():465] updated telemetry
--2022-04-09 17:51:51,799 INFO    MainThread:14720 [wandb_init.py:init():484] communicating current version
--2022-04-09 17:51:51,800 INFO    WriterThread:14720 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
--2022-04-09 17:51:52,170 INFO    MainThread:14720 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 17:51:52,171 INFO    MainThread:14720 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files
--2022-04-09 17:51:52,824 INFO    SenderThread:14720 [sender.py:_start_run_threads():707] run started: z44hpswp with start time 1649506911
--2022-04-09 17:51:52,826 INFO    MainThread:14720 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 17:51:52,826 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:51:53,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/diff.patch
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code/train_translation.py
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/requirements.txt
--2022-04-09 17:51:53,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/code
--2022-04-09 17:51:54,261 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 17:51:54,262 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 17:51:54,263 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 17:51:54,272 INFO    MainThread:14720 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 17:51:54,274 INFO    MainThread:14720 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 17:51:54,276 INFO    MainThread:14720 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/conda-environment.yaml
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-metadata.json
--2022-04-09 17:51:54,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:56,133 INFO    Thread-15 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2ih8faqi-code/train_translation.py
--2022-04-09 17:51:56,134 INFO    Thread-14 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/hxttd0im-wandb-metadata.json
--2022-04-09 17:51:56,135 INFO    Thread-16 :14720 [upload_job.py:push():133] Uploaded file /tmp/tmp1e33tdlewandb/2f1e53ks-diff.patch
--2022-04-09 17:51:56,825 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/config.yaml
--2022-04-09 17:51:56,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:51:58,826 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:00,827 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:06,575 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:07,050 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:21,053 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:52:49,877 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:50,064 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:52:55,651 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:52:56,142 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:11,146 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:14,742 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:15,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:17,233 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:35,238 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:38,173 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:53:38,239 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:53:55,247 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:53:59,416 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:00,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:17,258 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:20,346 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:54:21,261 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:54:39,266 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:11,339 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:12,278 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:17,280 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:17,281 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:33,287 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:37,282 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:55:37,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:55:39,290 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:55:57,307 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:01,089 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:01,591 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:19,597 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:23,382 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:23,878 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:56:43,960 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:46,541 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:56:47,040 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:06,045 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:57:39,211 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:40,057 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:57:45,145 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:57:46,061 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:02,065 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:05,790 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:06,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:07,248 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:25,253 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:30,177 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:30,255 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:58:47,288 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:58:52,210 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:58:52,289 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:09,294 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:13,798 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 17:59:14,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 17:59:15,296 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 17:59:33,301 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:07,707 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:08,314 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:13,618 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:14,317 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:31,321 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:36,585 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:00:37,323 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:00:37,324 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:00:55,328 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:00,716 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:01,330 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:17,334 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:22,153 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:22,653 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:01:39,657 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:01:43,994 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:01:44,659 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:03,664 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:02:38,749 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:39,680 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:44,435 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:02:44,933 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:02:59,938 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:03,721 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:04,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:06,221 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:22,227 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/output.log
--2022-04-09 18:03:26,052 INFO    SenderThread:14720 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:26,231 INFO    Thread-11 :14720 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_175151-z44hpswp/files/wandb-summary.json
--2022-04-09 18:03:42,322 INFO    MainThread:14720 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/z44hpswp
-diff --git a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb b/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb
-deleted file mode 100644
-index 55f1aff..0000000
-Binary files a/wandb/run-20220409_175151-z44hpswp/run-z44hpswp.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py b/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml b/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml b/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-deleted file mode 100644
-index 194d831..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
-+++ /dev/null
-@@ -1,109 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      2:
--      - 1
--      - 11
--      3:
--      - 2
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 40
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 4
--nlayers:
--  desc: null
--  value: 6
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch b/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-deleted file mode 100644
-index 979dcc5..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
-+++ /dev/null
-@@ -1,645 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..42fbde8 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,313 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..371ace5 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..a6d9884 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..705068b 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_180353-vjrenr4z
--\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/output.log b/wandb/run-20220409_180353-vjrenr4z/files/output.log
-deleted file mode 100644
-index a2bf91c..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/output.log
-+++ /dev/null
-@@ -1,102 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--translation model saved in checkpoint
--{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--translation model saved in checkpoint
--{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--translation model saved in checkpoint
--{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--translation model saved in checkpoint
--{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--translation model saved in checkpoint
--{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--translation model saved in checkpoint
--{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--translation model saved in checkpoint
--{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--translation model saved in checkpoint
--{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--translation model saved in checkpoint
--{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--translation model saved in checkpoint
--{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--translation model saved in checkpoint
--{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--translation model saved in checkpoint
--{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--translation model saved in checkpoint
--{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--translation model saved in checkpoint
--{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--translation model saved in checkpoint
--{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--translation model saved in checkpoint
--{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--translation model saved in checkpoint
--{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--translation model saved in checkpoint
--{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--translation model saved in checkpoint
--{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--translation model saved in checkpoint
--{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--translation model saved in checkpoint
--{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--translation model saved in checkpoint
--{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--translation model saved in checkpoint
--{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--translation model saved in checkpoint
--{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--translation model saved in checkpoint
--{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--translation model saved in checkpoint
--{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--translation model saved in checkpoint
--{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--translation model saved in checkpoint
--{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--translation model saved in checkpoint
--{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--translation model saved in checkpoint
--{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--translation model saved in checkpoint
--{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--translation model saved in checkpoint
--{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--translation model saved in checkpoint
--{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--translation model saved in checkpoint
--{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--translation model saved in checkpoint
--{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--translation model saved in checkpoint
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt b/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-deleted file mode 100644
-index 3e24107..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:33:55.138080",
--    "startedAt": "2022-04-09T12:33:53.912960",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=40",
--        "--nhead=4",
--        "--nlayers=6"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json b/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-deleted file mode 100644
-index dbd5bb9..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 571.8498382568359, "_runtime": 1394, "_timestamp": 1649509027, "_step": 47, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-deleted file mode 100644
-index 6ac5722..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
-+++ /dev/null
-@@ -1,809 +0,0 @@
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,947 DEBUG   MainThread:18842 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 DEBUG   SenderThread:18842 [sender.py:send():179] send: header
--2022-04-09 18:03:53,957 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:53,958 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:54,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: run
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:55,130 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():39] meta init
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:__init__():53] meta init done
--2022-04-09 18:03:55,137 DEBUG   HandlerThread:18842 [meta.py:probe():210] probe
--2022-04-09 18:03:55,146 DEBUG   HandlerThread:18842 [meta.py:_setup_git():200] setup git
--2022-04-09 18:03:55,213 DEBUG   HandlerThread:18842 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:03:55,214 DEBUG   HandlerThread:18842 [meta.py:_save_code():89] save code
--2022-04-09 18:03:55,241 DEBUG   HandlerThread:18842 [meta.py:_save_code():110] save code done
--2022-04-09 18:03:55,242 DEBUG   HandlerThread:18842 [meta.py:_save_patches():127] save patches
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():57] save pip
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:03:55,334 DEBUG   HandlerThread:18842 [meta.py:_save_conda():78] save conda
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,710 DEBUG   HandlerThread:18842 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:03:56,711 DEBUG   HandlerThread:18842 [meta.py:probe():252] probe done
--2022-04-09 18:03:56,713 DEBUG   SenderThread:18842 [sender.py:send():179] send: files
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,723 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:03:56,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: config
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:04:09,890 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:12,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:12,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:23,959 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:27,637 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:27,637 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:43,070 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:43,071 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:04:54,578 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:04:58,609 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:04:58,609 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:13,418 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,096 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:14,096 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:19,610 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:25,318 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:05:29,536 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:29,536 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,041 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:05:45,042 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:05:45,711 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:55,878 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:00,385 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:00,385 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,115 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:12,116 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:15,812 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:15,812 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:26,509 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:06:31,252 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:31,252 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:06:39,204 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:46,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:06:46,699 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:06:57,088 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:02,128 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:02,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:07:07,189 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:17,560 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:17,560 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:27,788 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:33,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:33,039 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:07:48,472 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:07:58,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:03,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:03,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:10,495 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:10,496 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,773 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:16,774 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:19,358 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:19,358 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:29,127 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:08:34,827 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:34,827 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:08:43,393 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:50,258 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:08:50,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:08:59,791 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:05,625 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:05,625 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:09,196 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:21,079 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:21,079 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:30,544 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:36,425 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:36,426 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,629 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:09:37,630 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:51,758 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:09:51,758 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:01,192 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:10:06,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,213 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:07,213 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:22,576 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:22,576 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,752 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:10:37,928 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:37,928 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:10:53,268 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:02,406 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:08,610 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:08,610 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:12,361 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:18,663 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:23,966 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:23,966 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:33,001 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:39,600 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:39,600 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:11:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:54,944 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:11:54,944 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:03,627 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:10,280 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:10,280 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:12,130 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:25,635 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:25,635 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:34,297 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:12:36,014 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:40,989 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:40,989 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:56,322 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:12:56,323 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:00,307 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:05,226 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:11,687 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:11,687 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:27,035 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:27,035 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:35,749 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:13:42,474 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:42,475 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:13:57,111 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:13:57,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:03,217 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:06,507 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:13,240 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:13,240 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,985 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:26,986 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:28,667 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:28,668 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:37,148 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:14:44,310 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:44,310 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:14:53,107 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:59,666 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:14:59,666 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:07,695 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:14,998 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:14,998 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:17,525 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:30,334 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:30,334 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:38,429 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:15:44,460 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:45,673 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:15:45,673 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:01,020 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:01,020 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:09,031 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:16,349 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:16,349 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:31,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:39,689 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:46,381 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:47,261 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:16:47,261 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:16:52,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:02,605 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:02,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:10,351 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:16,742 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:17,935 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:17,935 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:33,308 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:33,308 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,998 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:17:44,097 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:17:44,098 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:48,657 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:17:48,817 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:04,733 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:10,263 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:11,869 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:20,065 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:20,065 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:35,442 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,258 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:18:42,271 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:50,780 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:18:50,780 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:06,176 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:06,176 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:12,884 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:21,533 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:21,533 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:36,872 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:41,320 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:43,542 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:19:47,487 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:52,222 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:19:52,222 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:07,575 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:07,575 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:11,295 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:14,395 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:22,919 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:22,920 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:38,284 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:38,284 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:20:39,161 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:44,947 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:20:53,719 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:20:53,719 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:05,165 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:09,154 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:09,154 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:15,554 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:24,513 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:24,513 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,048 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:21:32,049 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:39,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:39,921 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:21:46,176 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:55,292 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:21:55,292 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:10,678 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:10,679 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:16,761 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:26,337 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:26,337 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:37,631 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:41,696 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:41,696 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:22:43,842 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:22:43,843 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:47,574 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:22:57,038 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:22:57,038 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:06,284 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:12,473 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:12,473 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:18,151 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:27,820 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:27,820 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:23:37,389 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:43,266 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:43,266 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:48,907 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:23:58,729 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:23:58,729 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,447 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:03,448 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:14,167 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:14,167 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:19,591 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:29,519 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:29,520 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:24:31,880 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:44,877 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:24:44,877 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:24:50,128 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:00,259 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:00,259 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:15,606 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:20,792 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:30,948 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:30,948 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:32,468 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,976 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:25:38,977 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:46,374 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:25:46,374 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:25:51,548 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:01,722 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:01,723 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:03,261 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:03,262 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:17,072 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:17,072 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:22,124 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:26:32,410 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:32,411 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:26:38,163 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:47,810 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:26:47,810 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:26:52,753 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,241 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:03,241 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: history
--2022-04-09 18:27:07,299 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:18,699 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:18,700 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:23,342 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:34,106 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:34,107 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
--2022-04-09 18:27:39,696 INFO    MainThread:18842 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 0
--2022-04-09 18:27:39,697 INFO    MainThread:18842 [wandb_run.py:_restore():1480] restore
--2022-04-09 18:27:40,003 DEBUG   SenderThread:18842 [sender.py:send():179] send: telemetry
--2022-04-09 18:27:40,004 DEBUG   SenderThread:18842 [sender.py:send():179] send: exit
--2022-04-09 18:27:40,005 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,005 INFO    SenderThread:18842 [sender.py:send_exit():287] handling exit code: 0
--2022-04-09 18:27:40,006 INFO    SenderThread:18842 [sender.py:send_exit():295] send defer
--2022-04-09 18:27:40,006 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,008 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,008 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 0
--2022-04-09 18:27:40,008 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,010 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 0
--2022-04-09 18:27:40,010 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 1
--2022-04-09 18:27:40,011 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,011 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 1
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 1
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send():179] send: stats
--2022-04-09 18:27:40,067 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,067 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 2
--2022-04-09 18:27:40,067 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,067 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 2
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 3
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 3
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send():179] send: summary
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:40,068 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 3
--2022-04-09 18:27:40,068 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 4
--2022-04-09 18:27:40,068 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,068 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 4
--2022-04-09 18:27:40,069 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,069 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 4
--2022-04-09 18:27:40,110 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:40,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:40,461 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 5
--2022-04-09 18:27:40,462 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:40,463 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:40,464 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 5
--2022-04-09 18:27:40,464 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 2
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 49395
--}
--
--2022-04-09 18:27:40,465 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:40,465 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 5
--2022-04-09 18:27:40,466 INFO    SenderThread:18842 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:27:40,566 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,201 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:41,202 INFO    SenderThread:18842 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:27:41,205 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt requirements.txt
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log output.log
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:27:41,206 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:27:41,207 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml config.yaml
--2022-04-09 18:27:41,211 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch diff.patch
--2022-04-09 18:27:41,220 INFO    SenderThread:18842 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:27:41,223 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 6
--2022-04-09 18:27:41,224 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,225 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,225 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 6
--2022-04-09 18:27:41,225 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 49395
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,226 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,226 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 6
--2022-04-09 18:27:41,230 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:41,231 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 7
--2022-04-09 18:27:41,232 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,232 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 7
--2022-04-09 18:27:41,232 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,232 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 7
--2022-04-09 18:27:41,332 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:41,915 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 8
--2022-04-09 18:27:41,915 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:41,917 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,917 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 8
--2022-04-09 18:27:41,918 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:41,919 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 8
--2022-04-09 18:27:41,919 INFO    SenderThread:18842 [sender.py:send_request_defer():342] send defer: 9
--2022-04-09 18:27:41,921 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: defer
--2022-04-09 18:27:41,921 INFO    HandlerThread:18842 [handler.py:handle_request_defer():141] handle defer: 9
--2022-04-09 18:27:41,921 DEBUG   SenderThread:18842 [sender.py:send():179] send: final
--2022-04-09 18:27:41,922 DEBUG   SenderThread:18842 [sender.py:send():179] send: footer
--2022-04-09 18:27:41,923 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: defer
--2022-04-09 18:27:41,923 INFO    SenderThread:18842 [sender.py:send_request_defer():304] handle sender defer: 9
--2022-04-09 18:27:42,024 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,024 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,025 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,127 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,128 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,129 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,231 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,231 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,233 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,335 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,335 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,336 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,438 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,439 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,440 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,542 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,542 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,544 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,592 INFO    Thread-73 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:27:42,594 INFO    Thread-71 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:27:42,599 INFO    Thread-75 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:27:42,601 INFO    Thread-72 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:42,602 INFO    Thread-74 :18842 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:42,645 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,645 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,646 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,747 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,748 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,749 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,851 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: poll_exit
--2022-04-09 18:27:42,851 DEBUG   SenderThread:18842 [sender.py:send_request():193] send_request: poll_exit
--2022-04-09 18:27:42,852 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:42,853 INFO    MainThread:18842 [wandb_run.py:_wait_for_finish():1630] got exit ret: done: true
--exit_result {
--}
--file_counts {
--  wandb_count: 7
--  other_count: 1
--}
--pusher_stats {
--  uploaded_bytes: 62216
--  total_bytes: 62216
--}
--
--2022-04-09 18:27:42,855 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: get_summary
--2022-04-09 18:27:42,857 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: sampled_history
--2022-04-09 18:27:42,860 DEBUG   HandlerThread:18842 [handler.py:handle_request():124] handle_request: shutdown
--2022-04-09 18:27:42,861 INFO    HandlerThread:18842 [handler.py:finish():638] shutting down handler
--2022-04-09 18:27:42,922 INFO    WriterThread:18842 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:27:43,852 INFO    SenderThread:18842 [sender.py:finish():933] shutting down sender
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:27:43,853 INFO    SenderThread:18842 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_summary():1785] rendering summary
--2022-04-09 18:27:43,866 INFO    MainThread:18842 [wandb_run.py:_show_history():1823] rendering history
--2022-04-09 18:27:43,868 INFO    MainThread:18842 [wandb_run.py:_show_files():1852] logging synced files
--2022-04-09 18:27:43,884 INFO    MainThread:18842 [internal.py:handle_exit():78] Internal process exited
-diff --git a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log b/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-deleted file mode 100644
-index 55b000f..0000000
---- a/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
-+++ /dev/null
-@@ -1,230 +0,0 @@
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'vjrenr4z', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml', 'start_method': 'thread'}
--2022-04-09 18:03:53,918 INFO    MainThread:18842 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug.log
--2022-04-09 18:03:53,919 INFO    MainThread:18842 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/logs/debug-internal.log
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:03:53,920 INFO    MainThread:18842 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--config: {'workers': 4, 'epochs': 40, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 1024, 'nlayers': 6, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:53,921 INFO    MainThread:18842 [wandb_init.py:init():418] starting backend
--2022-04-09 18:03:53,941 INFO    MainThread:18842 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:03:53,943 INFO    MainThread:18842 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:03:53,945 INFO    wandb_internal:18842 [internal.py:wandb_internal():91] W&B internal server running at pid: 18842, started at: 2022-04-09 18:03:53.943037
--2022-04-09 18:03:53,947 INFO    MainThread:18842 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:03:53,950 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 40, 'nhead': 4, 'nlayers': 6}
--2022-04-09 18:03:53,955 INFO    MainThread:18842 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:03:53,956 INFO    MainThread:18842 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:03:53,957 INFO    WriterThread:18842 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
--2022-04-09 18:03:54,486 INFO    MainThread:18842 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:03:54,487 INFO    MainThread:18842 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:03:55,116 INFO    SenderThread:18842 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files
--2022-04-09 18:03:55,117 INFO    SenderThread:18842 [sender.py:_start_run_threads():707] run started: vjrenr4z with start time 1649507633
--2022-04-09 18:03:55,128 INFO    MainThread:18842 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:03:55,129 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/diff.patch
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/requirements.txt
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code/train_translation.py
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:03:56,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/code
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:03:56,713 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:03:56,714 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:03:56,723 INFO    MainThread:18842 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:03:56,725 INFO    MainThread:18842 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:03:56,726 INFO    MainThread:18842 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:03:56,727 INFO    MainThread:18842 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/conda-environment.yaml
--2022-04-09 18:03:57,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:57,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-metadata.json
--2022-04-09 18:03:57,913 INFO    Thread-14 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/3wu5f9t3-wandb-metadata.json
--2022-04-09 18:03:57,923 INFO    Thread-16 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/2smukmpq-diff.patch
--2022-04-09 18:03:57,930 INFO    Thread-15 :18842 [upload_job.py:push():133] Uploaded file /tmp/tmpzmoqkqw7wandb/371w3hlh-code/train_translation.py
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:03:59,117 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/config.yaml
--2022-04-09 18:04:01,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:03,118 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:09,891 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:04:10,122 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:04:11,123 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:04:29,127 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:13,420 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:14,143 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:19,611 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:20,217 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:21,219 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:41,224 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:05:45,712 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:05:46,334 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:05:47,336 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:07,341 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:12,116 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:12,343 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:06:13,344 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:35,351 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,205 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:06:39,374 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:03,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,190 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:07:07,380 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:07,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:07:09,381 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:07:29,386 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:10,500 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:11,402 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:16,774 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:17,405 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:08:37,410 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,394 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:08:43,412 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:05,419 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,197 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:09,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:09:33,430 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:09:37,630 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:09:38,434 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:01,440 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:05,442 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:06,067 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:10:06,682 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:10:07,683 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:10:31,689 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:12,362 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:12,703 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:18,664 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:18,705 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:19,707 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:37,712 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:11:41,922 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:11:42,714 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:11:43,715 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:07,721 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:11,723 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:12,130 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:12,734 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:31,739 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:35,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:36,015 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:12:36,741 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:12:55,746 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:12:59,748 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:00,307 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:00,912 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:13:01,913 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:21,919 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:13:57,112 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:13:57,932 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:03,218 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:03,934 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:21,939 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:26,986 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:27,945 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:14:47,950 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,108 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:14:53,953 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:13,958 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:17,526 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:18,140 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:40,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:15:44,461 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:15:45,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:15:46,147 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:06,158 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:16:46,382 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:47,176 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:52,592 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:16:53,194 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:16:54,197 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:12,202 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:16,743 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:17,346 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:17:18,348 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:40,354 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,098 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:17:44,357 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:06,364 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,264 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:10,365 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:38,376 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,271 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:18:42,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:18:44,377 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:04,383 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:41,321 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:41,396 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:19:47,488 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:19:48,401 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:06,406 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:11,296 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:11,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:12,408 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:34,414 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:20:39,162 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:20:39,416 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:20:40,417 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:00,422 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:04,424 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:05,166 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:05,425 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:26,433 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,050 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:21:32,675 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:21:54,681 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:37,631 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:37,700 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:22:43,843 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:22:44,765 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:22:44,766 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:02,770 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,284 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:06,892 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:32,899 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:37,389 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:23:38,007 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:23:39,009 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:23:59,017 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,019 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:03,448 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:04,073 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:27,080 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:31,880 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:24:32,082 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:24:33,083 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:24:53,088 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:32,469 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:33,103 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:38,977 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:25:39,145 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:25:41,146 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:25:59,152 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:03,262 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:04,154 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:05,155 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:33,162 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:26:38,164 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:26:38,225 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:26:39,168 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:03,173 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,175 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/output.log
--2022-04-09 18:27:07,300 INFO    SenderThread:18842 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:08,179 INFO    Thread-11 :18842 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_180353-vjrenr4z/files/wandb-summary.json
--2022-04-09 18:27:39,695 INFO    MainThread:18842 [wandb_run.py:finish():1208] finishing run tera_squid/context_enhancement/vjrenr4z
-diff --git a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb b/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb
-deleted file mode 100644
-index 2a205f7..0000000
-Binary files a/wandb/run-20220409_180353-vjrenr4z/run-vjrenr4z.wandb and /dev/null differ
-diff --git a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py b/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-deleted file mode 100644
-index f284015..0000000
---- a/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
-+++ /dev/null
-@@ -1,382 +0,0 @@
--import numpy as np
--from pathlib import Path
--import argparse
--import json
--import math
--import os
--import random
--import signal
--import subprocess
--import sys
--import time
--
--import torch
--from torch import nn, optim 
--from torch.nn import Transformer 
--import torchtext
--import t_dataset
--from t_dataset import  Translation_dataset_t
--from t_dataset import  MyCollate
--import translation_utils 
--from translation_utils import TokenEmbedding, PositionalEncoding 
--from translation_utils import create_mask
--from transformers import BertModel 
--from transformers import AutoTokenizer
--from torch import Tensor
--from torchtext.data.metrics import bleu_score
--from models import Translator
--from models import BarlowTwins
--
--import wandb 
--
--
--#import barlow
--os.environ['TRANSFORMERS_OFFLINE'] = 'yes'
--os.environ['WANDB_START_METHOD'] = 'thread'
--
--MANUAL_SEED = 4444
--
--random.seed(MANUAL_SEED)
--np.random.seed(MANUAL_SEED)
--torch.manual_seed(MANUAL_SEED)
--torch.backends.cudnn.deterministic = True
--
--
--parser = argparse.ArgumentParser(description = 'Translation') 
--
--# Training hyper-parameters: 
--parser.add_argument('--workers', default=4, type=int, metavar='N', 
--                    help='number of data loader workers') 
--parser.add_argument('--epochs', default=5, type=int, metavar='N',
--                    help='number of total epochs to run')
--parser.add_argument('--batch_size', default=4, type=int, metavar='n',
--                    help='mini-batch size')
--parser.add_argument('--learning-rate', default=0.2, type=float, metavar='LR',
--                    help='base learning rate')
--parser.add_argument('--dropout', default=0.01, type=float, metavar='d',
--                    help='dropout for training translation transformer')
--parser.add_argument('--weight-decay', default=1e-6, type=float, metavar='W',
--                    help='weight decay')
--parser.add_argument('--clip', default=1, type=float, metavar='GC',
--                    help='Gradient Clipping')
--parser.add_argument('--betas', default=(0.9, 0.98), type=tuple, metavar='B',
--                    help='betas for Adam Optimizer')
--parser.add_argument('--eps', default=1e-9, type=float, metavar='E',
--                    help='eps for Adam optimizer')
--parser.add_argument('--loss_fn', default='cross_entropy', type=str, metavar='LF',
--                    help='loss function for translation')
--
--# Transformer parameters: 
--parser.add_argument('--dmodel', default=768, type=int, metavar='T', 
--                    help='dimension of transformer encoder')
--parser.add_argument('--nhead', default=4, type= int, metavar='N', 
--                    help= 'number of heads in transformer') 
--parser.add_argument('--dfeedforward', default=256, type=int, metavar='F', 
--                    help= 'dimension of feedforward layer in transformer encoder') 
--parser.add_argument('--nlayers', default=3, type=int, metavar= 'N', 
--                   help='number of layers of transformer encoder') 
--parser.add_argument('--projector', default='768-256', type=str,
--                    metavar='MLP', help='projector MLP')
--
--# Tokenizer: 
--parser.add_argument('--tokenizer', default='bert-base-multilingual-uncased', type=str, 
--                metavar='T', help= 'tokenizer')
--parser.add_argument('--mbert-out-size', default=768, type=int, metavar='MO', 
--                    help='Dimension of mbert output')
--# Paths: 
--parser.add_argument('--checkpoint_dir', default='./checkpoint/', type=Path,
--                    metavar='DIR', help='path to checkpoint directory')
--
--# to load or barlow or not: 
--parser.add_argument('--load', default=0, type=int,
--                    metavar='DIR', help='to load barlow twins encoder or not')
--
--# calculate bleu: 
--parser.add_argument('--checkbleu', default=5 , type=int,
--                    metavar='BL', help='check bleu after these number of epochs')
--# train or test dataset
--parser.add_argument('--train', default=True , type=bool,
--                    metavar='T', help='selecting train set')
--
--parser.add_argument('--print_freq', default=5 , type=int,
--                    metavar='PF', help='frequency of printing and saving stats')
--
--parser.add_argument('--test_translation', default=0, type=int, 
--                    metavar='TT', help='testing translation_score')
--''' NOTE: 
--        Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
--'''
--
--args = parser.parse_args()
--# print(args.load)
--os.environ["TOKENIZERS_PARALLELISM"] = "true"
--
--def main(): 
--
--    # print("entered main")
--    args.ngpus_per_node = torch.cuda.device_count()
--    if 'SLURM_JOB_ID' in os.environ:
--        # single-node and multi-node distributed training on SLURM cluster
--        # requeue job on SLURM preemption
--        signal.signal(signal.SIGUSR1, handle_sigusr1)
--        signal.signal(signal.SIGTERM, handle_sigterm)
--        # find a common host name on all nodes
--        # assume scontrol returns hosts in the same order on all nodes
--        cmd = 'scontrol show hostnames ' + os.getenv('SLURM_JOB_NODELIST')
--        stdout = subprocess.check_output(cmd.split())
--        host_name = stdout.decode().splitlines()[0]
--        args.rank = int(os.getenv('SLURM_NODEID')) * args.ngpus_per_node
--        args.world_size = int(os.getenv('SLURM_NNODES')) * args.ngpus_per_node
--        args.dist_url = f'tcp://{host_name}:58472'
--    else:
--        # single-node distributed training
--        args.rank = 0
--        args.dist_url = 'tcp://localhost:58472'
--        args.world_size = args.ngpus_per_node
--    torch.multiprocessing.spawn(main_worker, (args,), args.ngpus_per_node)
--
--
--def main_worker(gpu, args):
--    
--    args.rank += gpu
--    torch.distributed.init_process_group(
--        backend='nccl', init_method=args.dist_url,
--        world_size=args.world_size, rank=args.rank)
--
--    if args.rank == 0:
--
--        wandb.init(config=args, project='translation_test')#############################################
--        wandb.config.update(args)
--        config = wandb.config
--    
--        # exit()
--        args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--        stats_file = open(args.checkpoint_dir / 'stats.txt', 'a', buffering=1)
--        print(' '.join(sys.argv))
--        print(' '.join(sys.argv), file=stats_file)
--
--    torch.cuda.set_device(gpu)
--    torch.backends.cudnn.benchmark = True
--
--    dataset = Translation_dataset_t(train=args.train) 
--    src_vocab_size = dataset.de_vocab_size
--    trg_vocab_size = dataset.en_vocab_size
--    tokenizer = dataset.tokenizer  
--    pad_idx = tokenizer.pad_token_id
--    sos_idx = tokenizer.cls_token_id 
--    eos_idx = tokenizer.sep_token_id
--
--#    transformer1 = nn.TransformerEncoderLayer(d_model = args.dmodel, nhead=args.nhead, dim_feedforward=args.dfeedforward, batch_first=True)
--    # t_enc = nn.TransformerEncoder(transformer1, num_layers=args.nlayers)
--    # print(src_vocab_size, trg_vocab_size)
--    mbert = BertModel.from_pretrained('bert-base-multilingual-uncased')
--    transformer = Transformer(d_model=args.dmodel, 
--                              nhead=args.nhead, 
--                              num_encoder_layers=args.nlayers, 
--                              num_decoder_layers = args.nlayers, 
--                              dim_feedforward=args.dfeedforward, 
--                              dropout=args.dropout)
--    model = Translator(mbert=mbert, transformer= transformer, tgt_vocab_size=trg_vocab_size, emb_size=args.mbert_out_size).cuda(gpu)
--    # print(model.state_dict)
--#    model_barlow = barlow.BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=model.transformer.encoder, lambd=args.lambd).cuda(gpu)
--
--    # args.load = False
--
--    if args.load == 1: 
--        # print(args.load)
--        # print('inside')
--        print('loading barlow model')
--        t_enc = model.transformer.encoder
--        barlow = BarlowTwins(projector_layers=args.projector, mbert_out_size=args.mbert_out_size, transformer_enc=t_enc, mbert=mbert, lambd=0.0051).cuda(gpu)
--        ### note: lambd is just a placeholder
--        ckpt = torch.load(args.checkpoint_dir/ 'barlow_checkpoint.pth', 
--                            map_location='cpu')
--        barlow.load_state_dict(ckpt['model'])
--        model.transformer.encoder = barlow.transformer_enc        
--        model.mbert = barlow.mbert
--    '''
--    to_do: 
--    if post_train: 
--        torch.load(model.states_dict)
--        model.transformer.encoder = model_barlow
--
--    '''
--#    model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
--
--    param_weights = []
--    param_biases = []
--    for param in model.parameters():
--        if param.ndim == 1:
--            param_biases.append(param)
--        else:
--            param_weights.append(param)
--    parameters = [{'params': param_weights}, {'params': param_biases}]
--    model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu], find_unused_parameters=True)
--
--###########################################################
--    optimizer =torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps) 
--    
--    if args.loss_fn == 'cross_entropy': 
--        loss_fn = torch.nn.CrossEntropyLoss(ignore_index=pad_idx)
--##############################################################
--
--    start_epoch = 0 
--
--    sampler = torch.utils.data.distributed.DistributedSampler(dataset)
--
--    assert args.batch_size % args.world_size == 0
--    per_device_batch_size = args.batch_size // args.world_size
--    ###############################
--    loader = torch.utils.data.DataLoader(
--         dataset, batch_size=per_device_batch_size, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--   
--    test_loader = torch.utils.data.DataLoader(
--         dataset, batch_size=1, num_workers=args.workers,
--         pin_memory=True, sampler=sampler, collate_fn = MyCollate(tokenizer=tokenizer,bert2id_dict=dataset.bert2id_dict))
--    #############################
--    start_time = time.time()
--
--
--    if not args.test_translation: 
--
--        for epoch in range(start_epoch, args.epochs):
--            sampler.set_epoch(epoch)
--            epoch_loss = 0 
--            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--                src = sent[0].cuda(gpu, non_blocking=True)
--                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--                
--                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--                
--                optimizer.zero_grad()
--
--                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--                loss.backward()
--
--                optimizer.step()
--                # losses += loss.item()
--                
--                # wandb.log({'iter_loss': loss})
--                epoch_loss += loss.item()
--                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--                
--                if step % args.print_freq == 0:
--                    if args.rank == 0:
--                        stats = dict(epoch=epoch, step=step,
--                                    loss=loss.item(),
--                                    time=int(time.time() - start_time))
--                        print(json.dumps(stats))
--                        print(json.dumps(stats), file=stats_file)
--            if args.rank == 0:
--
--                wandb.log({"epoch_loss":epoch_loss})
--                # save checkpoint
--                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--                            optimizer=optimizer.state_dict())
--                # print(model.state_dict)
--                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--                print('translation model saved in', args.checkpoint_dir)
--            
--    ##############################################################
--            if args.rank == 0: 
--                if epoch%args.checkbleu ==0 : 
--
--                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--                    wandb.log({'bleu_score': bleu_score}) 
--    #            print(bleu_score(predicted, target))
--    ##############################################################
--    #        if epoch%1 ==0 : 
--    #            torch.save(model.module.state_dict(),
--    #                   'path.pth')
--    #            print("Model is saved")
--            # if args.rank == 0:
--            #     # save checkpoint
--            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--            #                  optimizer=optimizer.state_dict())
--            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--            #     print('saved translation model in', args.checkpoint_dir)
--        wandb.finish()
--            
--    else: 
--
--        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--        print('test_bleu_score', bleu_score)
--        if args.rank == 0: 
--            wandb.log({'bleu_score': bleu_score})
--
--
--def checkbleu(model, tokenizer, test_loader, gpu): 
--
--    model.eval()
--    predicted=[]
--    target=[]
--            
--    for i in test_loader: 
--        src = i[0].cuda(gpu, non_blocking=True)
--        tgt_out = i[3].cuda(gpu, non_blocking=True)
--        num_tokens = src.shape[0]
--
--        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--        out = translate(model, src, tokenizer, src_mask, gpu)
--        predicted.append(out)
--        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                
--        try: 
--            bleu_score(predicted, target)
--        except: 
--            predicted.pop()
--            target.pop()
--            
--        bleu = bleu_score(predicted, target)
--
--    return bleu
--
--'''
--todo: 
--    BLEU score
--'''
--
--# function to generate output sequence using greedy algorithm 
--def greedy_decode(model, src, src_mask, max_len, start_symbol, eos_idx, gpu):
--    src = src
--    src_mask = src_mask
--
--    memory = model.module.encode(src, src_mask)
--    ys = torch.ones(1, 1).fill_(start_symbol).type(torch.long).cuda(gpu, non_blocking=True)
--    for i in range(max_len-1):
--        memory = memory
--        tgt_mask = (translation_utils.generate_square_subsequent_mask(ys.size(0))
--                    .type(torch.bool)).cuda(gpu, non_blocking=True)
--        out = model.module.decode(ys, memory, tgt_mask)
--        out = out.transpose(0, 1)
--        prob = model.module.generator(out[:, -1])
--        _, next_word = torch.max(prob, dim=1)
--        next_word = next_word.item()
--
--        ys = torch.cat([ys,
--                        torch.ones(1, 1).type_as(src.data).fill_(next_word)], dim=0)
--        if next_word == eos_idx:
--            break
--    return ys
--
--
--# actual function to translate input sentence into target language
--def translate(model: torch.nn.Module, 
--        src: torch.tensor, 
--        tokenizer,src_mask, gpu):
--    model.eval()
--    
--    num_tokens = src.shape[0]
--    
--    
--    tgt_tokens = greedy_decode(
--        model,  src, src_mask, max_len=num_tokens + 5, start_symbol=tokenizer.cls_token_id, eos_idx=tokenizer.sep_token_id, gpu=gpu).flatten()
--    return tokenizer.convert_ids_to_tokens(tgt_tokens) 
--
--
--if __name__ == '__main__': 
--    main()
--    wandb.finish()
-diff --git a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml b/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-deleted file mode 100644
-index 72eed10..0000000
---- a/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-+++ /dev/null
-@@ -1,124 +0,0 @@
--name: ectc
--channels:
--  - pytorch
--  - defaults
--dependencies:
--  - _libgcc_mutex=0.1=main
--  - _openmp_mutex=4.5=1_gnu
--  - blas=1.0=mkl
--  - brotlipy=0.7.0=py37h27cfd23_1003
--  - bzip2=1.0.8=h7b6447c_0
--  - ca-certificates=2022.3.18=h06a4308_0
--  - certifi=2021.10.8=py37h06a4308_2
--  - cffi=1.15.0=py37hd667e15_1
--  - cryptography=36.0.0=py37h9ce1e76_0
--  - cudatoolkit=11.3.1=h2bc3f7f_2
--  - ffmpeg=4.3=hf484d3e_0
--  - freetype=2.11.0=h70c0345_0
--  - giflib=5.2.1=h7b6447c_0
--  - gmp=6.2.1=h2531618_2
--  - gnutls=3.6.15=he1e5248_0
--  - idna=3.3=pyhd3eb1b0_0
--  - intel-openmp=2021.4.0=h06a4308_3561
--  - jpeg=9d=h7f8727e_0
--  - lame=3.100=h7b6447c_0
--  - lcms2=2.12=h3be6417_0
--  - ld_impl_linux-64=2.35.1=h7274673_9
--  - libffi=3.3=he6710b0_2
--  - libgcc-ng=9.3.0=h5101ec6_17
--  - libgomp=9.3.0=h5101ec6_17
--  - libiconv=1.15=h63c8f33_5
--  - libidn2=2.3.2=h7f8727e_0
--  - libpng=1.6.37=hbc83047_0
--  - libstdcxx-ng=9.3.0=hd4cf53a_17
--  - libtasn1=4.16.0=h27cfd23_0
--  - libtiff=4.2.0=h85742a9_0
--  - libunistring=0.9.10=h27cfd23_0
--  - libuv=1.40.0=h7b6447c_0
--  - libwebp=1.2.2=h55f646e_0
--  - libwebp-base=1.2.2=h7f8727e_0
--  - lz4-c=1.9.3=h295c915_1
--  - mkl=2021.4.0=h06a4308_640
--  - mkl-service=2.4.0=py37h7f8727e_0
--  - mkl_fft=1.3.1=py37hd3c417c_0
--  - mkl_random=1.2.2=py37h51133e4_0
--  - ncurses=6.3=h7f8727e_2
--  - nettle=3.7.3=hbbd107a_1
--  - numpy-base=1.21.2=py37h79a1101_0
--  - openh264=2.1.1=h4ff587b_0
--  - openssl=1.1.1n=h7f8727e_0
--  - pip=21.2.2=py37h06a4308_0
--  - pycparser=2.21=pyhd3eb1b0_0
--  - pyopenssl=22.0.0=pyhd3eb1b0_0
--  - pysocks=1.7.1=py37_1
--  - python=3.7.11=h12debd9_0
--  - pytorch=1.11.0=py3.7_cuda11.3_cudnn8.2.0_0
--  - pytorch-mutex=1.0=cuda
--  - readline=8.1.2=h7f8727e_1
--  - requests=2.27.1=pyhd3eb1b0_0
--  - setuptools=58.0.4=py37h06a4308_0
--  - six=1.16.0=pyhd3eb1b0_1
--  - sqlite=3.38.0=hc218d9a_0
--  - tk=8.6.11=h1ccaba5_0
--  - torchaudio=0.11.0=py37_cu113
--  - typing_extensions=4.1.1=pyh06a4308_0
--  - wheel=0.37.1=pyhd3eb1b0_0
--  - xz=5.2.5=h7b6447c_0
--  - zlib=1.2.11=h7f8727e_4
--  - zstd=1.4.9=haebb681_0
--  - pip:
--    - aiohttp==3.8.1
--    - aiosignal==1.2.0
--    - async-timeout==4.0.2
--    - asynctest==0.13.0
--    - attrs==21.4.0
--    - blessings==1.7
--    - charset-normalizer==2.0.12
--    - click==8.0.4
--    - configparser==5.2.0
--    - datasets==1.16.1
--    - dill==0.3.4
--    - docker-pycreds==0.4.0
--    - filelock==3.6.0
--    - frozenlist==1.3.0
--    - fsspec==2022.2.0
--    - gitdb==4.0.9
--    - gitpython==3.1.27
--    - gpustat==0.6.0
--    - huggingface-hub==0.4.0
--    - importlib-metadata==4.11.3
--    - joblib==1.1.0
--    - multidict==6.0.2
--    - multiprocess==0.70.12.2
--    - numpy==1.21.5
--    - nvidia-ml-py3==7.352.0
--    - packaging==21.3
--    - pandas==1.3.5
--    - pathtools==0.1.2
--    - pillow==9.0.1
--    - promise==2.3
--    - protobuf==3.19.4
--    - psutil==5.9.0
--    - pyarrow==7.0.0
--    - pyparsing==3.0.7
--    - python-dateutil==2.8.2
--    - pytz==2022.1
--    - pyyaml==6.0
--    - regex==2022.3.15
--    - sacremoses==0.0.49
--    - sentry-sdk==1.5.8
--    - shortuuid==1.0.8
--    - smmap==5.0.0
--    - subprocess32==3.5.4
--    - tokenizers==0.10.3
--    - torch==1.11.0
--    - torchtext==0.12.0
--    - torchvision==0.9.1
--    - tqdm==4.63.1
--    - transformers==4.14.1
--    - urllib3==1.26.9
--    - wandb==0.10.31
--    - xxhash==3.0.0
--    - yarl==1.7.2
--    - zipp==3.7.0
--prefix: /home/ivlabs/miniconda3/envs/ectc
-diff --git a/wandb/run-20220409_182749-paufev36/files/config.yaml b/wandb/run-20220409_182749-paufev36/files/config.yaml
-deleted file mode 100644
-index c4a0d20..0000000
---- a/wandb/run-20220409_182749-paufev36/files/config.yaml
-+++ /dev/null
-@@ -1,104 +0,0 @@
--wandb_version: 1
--
--_wandb:
--  desc: null
--  value:
--    cli_version: 0.10.31
--    code_path: code/train_translation.py
--    framework: huggingface
--    huggingface_version: 4.14.1
--    is_jupyter_run: false
--    is_kaggle_kernel: false
--    python_version: 3.7.11
--    t:
--      1:
--      - 1
--      - 11
--      4: 3.7.11
--      5: 0.10.31
--      6: 4.14.1
--      8:
--      - 8
--batch_size:
--  desc: null
--  value: 32
--betas:
--  desc: null
--  value:
--  - 0.9
--  - 0.98
--checkbleu:
--  desc: null
--  value: 5
--checkpoint_dir:
--  desc: null
--  value: checkpoint
--clip:
--  desc: null
--  value: 1
--dfeedforward:
--  desc: null
--  value: 1024
--dist_url:
--  desc: null
--  value: tcp://localhost:58472
--dmodel:
--  desc: null
--  value: 768
--dropout:
--  desc: null
--  value: 0.01
--epochs:
--  desc: null
--  value: 32
--eps:
--  desc: null
--  value: 1.0e-09
--learning_rate:
--  desc: null
--  value: 0.2
--load:
--  desc: null
--  value: 0
--loss_fn:
--  desc: null
--  value: cross_entropy
--mbert_out_size:
--  desc: null
--  value: 768
--ngpus_per_node:
--  desc: null
--  value: 2
--nhead:
--  desc: null
--  value: 2
--nlayers:
--  desc: null
--  value: 4
--print_freq:
--  desc: null
--  value: 5
--projector:
--  desc: null
--  value: 768-256
--rank:
--  desc: null
--  value: 0
--test_translation:
--  desc: null
--  value: 0
--tokenizer:
--  desc: null
--  value: bert-base-multilingual-uncased
--train:
--  desc: null
--  value: true
--weight_decay:
--  desc: null
--  value: 1.0e-06
--workers:
--  desc: null
--  value: 4
--world_size:
--  desc: null
--  value: 2
-diff --git a/wandb/run-20220409_182749-paufev36/files/diff.patch b/wandb/run-20220409_182749-paufev36/files/diff.patch
-deleted file mode 100644
-index 17f6c34..0000000
---- a/wandb/run-20220409_182749-paufev36/files/diff.patch
-+++ /dev/null
-@@ -1,694 +0,0 @@
--diff --git a/__pycache__/barlow.cpython-37.pyc b/__pycache__/barlow.cpython-37.pyc
--index d9b3757..420c21a 100644
--Binary files a/__pycache__/barlow.cpython-37.pyc and b/__pycache__/barlow.cpython-37.pyc differ
--diff --git a/__pycache__/train_translation.cpython-37.pyc b/__pycache__/train_translation.cpython-37.pyc
--index 7bf3ea7..b5b1fb5 100644
--Binary files a/__pycache__/train_translation.cpython-37.pyc and b/__pycache__/train_translation.cpython-37.pyc differ
--diff --git a/barlow.py b/barlow.py
--index 99b0da9..b20d671 100644
----- a/barlow.py
--+++ b/barlow.py
--@@ -265,13 +265,6 @@ def main_worker(gpu, args):
--                          optimizer=optimizer.state_dict())
--             torch.save(state, args.checkpoint_dir / 'barlow_checkpoint.pth')
--             print('barlow model saved in', args.checkpoint_dir)
---            for sent in test_loader: 
---                y1 = sent[0].cuda(gpu, non_blocking=True)
---                y2 = sent[1].cuda(gpu, non_blocking=True)
---                model.eval()
---                c, _ = model(y1, y2)
---                xlabels = tokenizer.convert_ids_to_tokens(y2)
---                ylabels = tokenizer.convert_ids_to_tokens(y1)
-- #    wandb.finish()
-- #    if args.rank == 0:
-- #        save final model
--diff --git a/checkpoint/stats.txt b/checkpoint/stats.txt
--index 97f9eb6..e8bd4e3 100644
----- a/checkpoint/stats.txt
--+++ b/checkpoint/stats.txt
--@@ -467,3 +467,362 @@ train_translation.py
-- {"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
-- {"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
-- {"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 133}
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 6}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 7}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 7}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 8}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 8}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 9}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 9}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 60}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 61}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 61}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 62}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 63}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 63}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 64}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 64}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 65}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 85}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 85}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 86}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 86}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 87}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 88}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 88}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 89}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 89}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 106}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 106}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 107}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 108}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 108}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 109}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 109}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 110}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 110}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 129}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 130}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 130}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 131}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 131}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 132}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 132}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 133}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 134}
--+/home/ivlabs/context_enhancement/context_enhancement/barlow.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 1
--+/home/ivlabs/context_enhancement/context_enhancement/train_translation.py --load 0
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=28 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120170593261719, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 151.9119415283203, "time": 8}
--+{"epoch": 1, "step": 10, "loss": 112.8124008178711, "time": 84}
--+{"epoch": 2, "step": 15, "loss": 47.12509536743164, "time": 111}
--+{"epoch": 3, "step": 20, "loss": 45.04984664916992, "time": 139}
--+{"epoch": 4, "step": 25, "loss": 38.9657096862793, "time": 165}
--+{"epoch": 5, "step": 30, "loss": 60.226715087890625, "time": 190}
--+{"epoch": 5, "step": 35, "loss": 65.24925231933594, "time": 192}
--+{"epoch": 6, "step": 40, "loss": 65.57554626464844, "time": 268}
--+{"epoch": 7, "step": 45, "loss": 61.62765121459961, "time": 294}
--+{"epoch": 8, "step": 50, "loss": 64.9477310180664, "time": 319}
--+{"epoch": 9, "step": 55, "loss": 72.8912353515625, "time": 344}
--+{"epoch": 10, "step": 60, "loss": 86.97362518310547, "time": 369}
--+{"epoch": 10, "step": 65, "loss": 112.7873306274414, "time": 372}
--+{"epoch": 11, "step": 70, "loss": 88.19213104248047, "time": 447}
--+{"epoch": 12, "step": 75, "loss": 73.24372863769531, "time": 472}
--+{"epoch": 13, "step": 80, "loss": 73.8764419555664, "time": 498}
--+{"epoch": 14, "step": 85, "loss": 87.44139099121094, "time": 525}
--+{"epoch": 15, "step": 90, "loss": 66.60698699951172, "time": 551}
--+{"epoch": 15, "step": 95, "loss": 80.11738586425781, "time": 553}
--+{"epoch": 16, "step": 100, "loss": 88.93124389648438, "time": 624}
--+{"epoch": 17, "step": 105, "loss": 74.59225463867188, "time": 649}
--+{"epoch": 18, "step": 110, "loss": 108.9293441772461, "time": 675}
--+{"epoch": 19, "step": 115, "loss": 87.63671112060547, "time": 700}
--+{"epoch": 20, "step": 120, "loss": 99.23358154296875, "time": 725}
--+{"epoch": 20, "step": 125, "loss": 118.16622924804688, "time": 727}
--+{"epoch": 21, "step": 130, "loss": 102.9515380859375, "time": 801}
--+{"epoch": 22, "step": 135, "loss": 80.40345764160156, "time": 827}
--+{"epoch": 23, "step": 140, "loss": 87.99221801757812, "time": 852}
--+{"epoch": 24, "step": 145, "loss": 63.2794303894043, "time": 876}
--+{"epoch": 25, "step": 150, "loss": 78.17864227294922, "time": 902}
--+{"epoch": 25, "step": 155, "loss": 100.8608169555664, "time": 904}
--+{"epoch": 26, "step": 160, "loss": 88.68865203857422, "time": 976}
--+{"epoch": 27, "step": 165, "loss": 84.6174087524414, "time": 1002}
--+train_translation.py --batch_size=256 --dfeedforward=512 --epochs=32 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.139744758605957, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=36 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 6}
--+train_translation.py --batch_size=16 --dfeedforward=1024 --epochs=32 --nhead=6 --nlayers=2
--+{"epoch": 0, "step": 0, "loss": 7.180241584777832, "time": 5}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=20 --nhead=8 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.120020389556885, "time": 6}
--+train_translation.py --batch_size=64 --dfeedforward=512 --epochs=32 --nhead=2 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.082856178283691, "time": 6}
--+train_translation.py --batch_size=128 --dfeedforward=512 --epochs=16 --nhead=6 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140233993530273, "time": 6}
--+train_translation.py --batch_size=256 --dfeedforward=256 --epochs=40 --nhead=6 --nlayers=2
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 6}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 6}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 54}
--+{"epoch": 1, "step": 50, "loss": 83.65938568115234, "time": 54}
--+{"epoch": 1, "step": 55, "loss": 121.91883850097656, "time": 54}
--+{"epoch": 1, "step": 60, "loss": 69.18376159667969, "time": 55}
--+{"epoch": 1, "step": 65, "loss": 108.74915313720703, "time": 55}
--+{"epoch": 1, "step": 70, "loss": 72.30828094482422, "time": 55}
--+{"epoch": 1, "step": 75, "loss": 71.81124114990234, "time": 56}
--+{"epoch": 1, "step": 80, "loss": 68.34536743164062, "time": 56}
--+{"epoch": 1, "step": 85, "loss": 60.85449981689453, "time": 56}
--+{"epoch": 2, "step": 90, "loss": 77.74386596679688, "time": 77}
--+{"epoch": 2, "step": 95, "loss": 96.72307586669922, "time": 77}
--+{"epoch": 2, "step": 100, "loss": 101.80294036865234, "time": 77}
--+{"epoch": 2, "step": 105, "loss": 84.51009368896484, "time": 78}
--+{"epoch": 2, "step": 110, "loss": 72.72525787353516, "time": 78}
--+{"epoch": 2, "step": 115, "loss": 74.45042419433594, "time": 78}
--+{"epoch": 2, "step": 120, "loss": 67.41654968261719, "time": 79}
--+{"epoch": 2, "step": 125, "loss": 78.1681137084961, "time": 79}
--+{"epoch": 2, "step": 130, "loss": 92.35138702392578, "time": 79}
--+{"epoch": 3, "step": 135, "loss": 67.62174224853516, "time": 97}
--+{"epoch": 3, "step": 140, "loss": 73.0427017211914, "time": 97}
--+{"epoch": 3, "step": 145, "loss": 105.50846099853516, "time": 98}
--+{"epoch": 3, "step": 150, "loss": 80.58209991455078, "time": 98}
--+{"epoch": 3, "step": 155, "loss": 93.44019317626953, "time": 98}
--+{"epoch": 3, "step": 160, "loss": 89.55480194091797, "time": 99}
--+{"epoch": 3, "step": 165, "loss": 105.64498138427734, "time": 99}
--+{"epoch": 3, "step": 170, "loss": 114.21644592285156, "time": 99}
--+{"epoch": 3, "step": 175, "loss": 132.64865112304688, "time": 100}
--+{"epoch": 4, "step": 180, "loss": 123.47101593017578, "time": 116}
--+{"epoch": 4, "step": 185, "loss": 98.48711395263672, "time": 117}
--+{"epoch": 4, "step": 190, "loss": 106.57389831542969, "time": 117}
--+{"epoch": 4, "step": 195, "loss": 123.41980743408203, "time": 118}
--+{"epoch": 4, "step": 200, "loss": 133.0455322265625, "time": 118}
--+{"epoch": 4, "step": 205, "loss": 115.12477111816406, "time": 118}
--+{"epoch": 4, "step": 210, "loss": 173.08377075195312, "time": 119}
--+{"epoch": 4, "step": 215, "loss": 95.62724304199219, "time": 119}
--+{"epoch": 4, "step": 220, "loss": 146.6149444580078, "time": 119}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 5}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 7}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+train_translation.py --load 0
--+{"epoch": 0, "step": 0, "loss": 7.142178058624268, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 69.92982482910156, "time": 5}
--+{"epoch": 0, "step": 10, "loss": 187.95425415039062, "time": 5}
--+{"epoch": 0, "step": 15, "loss": 116.46453094482422, "time": 5}
--+{"epoch": 0, "step": 20, "loss": 103.49996948242188, "time": 6}
--+{"epoch": 0, "step": 25, "loss": 109.99765014648438, "time": 6}
--+{"epoch": 0, "step": 30, "loss": 82.7474365234375, "time": 6}
--+{"epoch": 0, "step": 35, "loss": 81.3102798461914, "time": 7}
--+{"epoch": 0, "step": 40, "loss": 68.49085235595703, "time": 7}
--+{"epoch": 1, "step": 45, "loss": 83.40009307861328, "time": 55}
--+{"epoch": 1, "step": 50, "loss": 83.36439514160156, "time": 55}
--+{"epoch": 1, "step": 55, "loss": 117.81816101074219, "time": 56}
--+{"epoch": 1, "step": 60, "loss": 70.09979248046875, "time": 56}
--+{"epoch": 1, "step": 65, "loss": 90.87323760986328, "time": 57}
--+{"epoch": 1, "step": 70, "loss": 60.27517318725586, "time": 57}
--+{"epoch": 1, "step": 75, "loss": 99.74661254882812, "time": 57}
--+{"epoch": 1, "step": 80, "loss": 76.57121276855469, "time": 58}
--+{"epoch": 1, "step": 85, "loss": 85.32162475585938, "time": 58}
--+{"epoch": 2, "step": 90, "loss": 79.57125091552734, "time": 104}
--+{"epoch": 2, "step": 95, "loss": 145.4536590576172, "time": 104}
--+{"epoch": 2, "step": 100, "loss": 72.27653503417969, "time": 105}
--+{"epoch": 2, "step": 105, "loss": 90.55571746826172, "time": 105}
--+{"epoch": 2, "step": 110, "loss": 83.55565643310547, "time": 105}
--+{"epoch": 2, "step": 115, "loss": 61.579551696777344, "time": 106}
--+{"epoch": 2, "step": 120, "loss": 98.33128356933594, "time": 107}
--+{"epoch": 2, "step": 125, "loss": 128.28770446777344, "time": 107}
--+{"epoch": 2, "step": 130, "loss": 82.06121063232422, "time": 108}
--+{"epoch": 3, "step": 135, "loss": 78.25971221923828, "time": 128}
--+{"epoch": 3, "step": 140, "loss": 75.09734344482422, "time": 128}
--+{"epoch": 3, "step": 145, "loss": 109.36125183105469, "time": 128}
--+{"epoch": 3, "step": 150, "loss": 102.68833923339844, "time": 129}
--+{"epoch": 3, "step": 155, "loss": 102.20543670654297, "time": 129}
--+{"epoch": 3, "step": 160, "loss": 98.07948303222656, "time": 129}
--+{"epoch": 3, "step": 165, "loss": 99.76647186279297, "time": 130}
--+{"epoch": 3, "step": 170, "loss": 98.70307159423828, "time": 130}
--+{"epoch": 3, "step": 175, "loss": 102.44486999511719, "time": 131}
--+{"epoch": 4, "step": 180, "loss": 101.29882049560547, "time": 150}
--+{"epoch": 4, "step": 185, "loss": 113.0394287109375, "time": 150}
--+{"epoch": 4, "step": 190, "loss": 102.2679214477539, "time": 150}
--+{"epoch": 4, "step": 195, "loss": 88.9566650390625, "time": 151}
--+{"epoch": 4, "step": 200, "loss": 80.84623718261719, "time": 151}
--+{"epoch": 4, "step": 205, "loss": 173.88238525390625, "time": 151}
--+{"epoch": 4, "step": 210, "loss": 138.01107788085938, "time": 152}
--+{"epoch": 4, "step": 215, "loss": 116.2401351928711, "time": 152}
--+{"epoch": 4, "step": 220, "loss": 119.53892517089844, "time": 153}
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py --load 0 --test_translation 1
--+train_translation.py
--+{"epoch": 0, "step": 0, "loss": 7.122797966003418, "time": 4}
--+{"epoch": 0, "step": 5, "loss": 198.62460327148438, "time": 4}
--+{"epoch": 0, "step": 10, "loss": 119.2225341796875, "time": 4}
--+{"epoch": 0, "step": 15, "loss": 89.00521850585938, "time": 4}
--+{"epoch": 0, "step": 20, "loss": 144.1955108642578, "time": 5}
--+{"epoch": 0, "step": 25, "loss": 126.2806396484375, "time": 5}
--+{"epoch": 0, "step": 30, "loss": 115.61041259765625, "time": 5}
--+{"epoch": 0, "step": 35, "loss": 84.10115814208984, "time": 5}
--+{"epoch": 0, "step": 40, "loss": 65.00213623046875, "time": 5}
--+{"epoch": 1, "step": 45, "loss": 79.53411865234375, "time": 75}
--+{"epoch": 1, "step": 50, "loss": 81.8320541381836, "time": 75}
--+{"epoch": 1, "step": 55, "loss": 97.07718658447266, "time": 75}
--+{"epoch": 1, "step": 60, "loss": 77.87088012695312, "time": 75}
--+{"epoch": 1, "step": 65, "loss": 91.45843505859375, "time": 75}
--+{"epoch": 1, "step": 70, "loss": 81.77067565917969, "time": 76}
--+{"epoch": 1, "step": 75, "loss": 93.20482635498047, "time": 76}
--+{"epoch": 1, "step": 80, "loss": 96.80836486816406, "time": 76}
--+{"epoch": 1, "step": 85, "loss": 99.4000473022461, "time": 76}
--+{"epoch": 2, "step": 90, "loss": 84.4419174194336, "time": 95}
--+{"epoch": 2, "step": 95, "loss": 89.35089111328125, "time": 95}
--+{"epoch": 2, "step": 100, "loss": 70.36296081542969, "time": 96}
--+{"epoch": 2, "step": 105, "loss": 93.40479278564453, "time": 96}
--+{"epoch": 2, "step": 110, "loss": 85.92987823486328, "time": 96}
--+{"epoch": 2, "step": 115, "loss": 84.89830780029297, "time": 96}
--+{"epoch": 2, "step": 120, "loss": 88.87590789794922, "time": 96}
--+{"epoch": 2, "step": 125, "loss": 89.31674194335938, "time": 96}
--+{"epoch": 2, "step": 130, "loss": 114.93965911865234, "time": 97}
--+{"epoch": 3, "step": 135, "loss": 76.80366516113281, "time": 115}
--+{"epoch": 3, "step": 140, "loss": 140.8549346923828, "time": 115}
--+{"epoch": 3, "step": 145, "loss": 113.339111328125, "time": 116}
--+{"epoch": 3, "step": 150, "loss": 93.06966400146484, "time": 116}
--+{"epoch": 3, "step": 155, "loss": 113.3215103149414, "time": 116}
--+{"epoch": 3, "step": 160, "loss": 109.3653335571289, "time": 116}
--+{"epoch": 3, "step": 165, "loss": 139.5435333251953, "time": 116}
--+{"epoch": 3, "step": 170, "loss": 76.41168975830078, "time": 117}
--+{"epoch": 3, "step": 175, "loss": 132.55953979492188, "time": 117}
--+{"epoch": 4, "step": 180, "loss": 109.78890228271484, "time": 143}
--+{"epoch": 4, "step": 185, "loss": 88.3539810180664, "time": 143}
--+{"epoch": 4, "step": 190, "loss": 113.5445327758789, "time": 144}
--+{"epoch": 4, "step": 195, "loss": 107.1954345703125, "time": 144}
--+{"epoch": 4, "step": 200, "loss": 127.9149398803711, "time": 144}
--+{"epoch": 4, "step": 205, "loss": 131.3365936279297, "time": 144}
--+{"epoch": 4, "step": 210, "loss": 129.23558044433594, "time": 145}
--+{"epoch": 4, "step": 215, "loss": 86.24095153808594, "time": 145}
--+{"epoch": 4, "step": 220, "loss": 143.04344177246094, "time": 145}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 679.4036254882812, "time": 10}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+barlow.py
--+{"epoch": 0, "step": 0, "lr_weights": 0.0, "lr_biases": 0.0, "loss": 456.90240478515625, "time": 8}
--+train_translation.py --batch_size=128 --dfeedforward=1024 --epochs=24 --nhead=4 --nlayers=4
--+{"epoch": 0, "step": 0, "loss": 7.140841484069824, "time": 5}
--+{"epoch": 2, "step": 5, "loss": 253.87469482421875, "time": 74}
--+{"epoch": 5, "step": 10, "loss": 150.13229370117188, "time": 139}
--+{"epoch": 7, "step": 15, "loss": 106.13131713867188, "time": 216}
--+{"epoch": 10, "step": 20, "loss": 77.7083511352539, "time": 285}
--+{"epoch": 12, "step": 25, "loss": 74.31400299072266, "time": 365}
--+{"epoch": 15, "step": 30, "loss": 74.50468444824219, "time": 432}
--+{"epoch": 17, "step": 35, "loss": 62.94711685180664, "time": 515}
--+{"epoch": 20, "step": 40, "loss": 59.828826904296875, "time": 583}
--+{"epoch": 22, "step": 45, "loss": 62.49226379394531, "time": 663}
--+train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=40 --nhead=4 --nlayers=6
--+{"epoch": 0, "step": 0, "loss": 7.117185592651367, "time": 5}
--+{"epoch": 0, "step": 5, "loss": 240.16217041015625, "time": 6}
--+{"epoch": 1, "step": 10, "loss": 155.1521453857422, "time": 76}
--+{"epoch": 2, "step": 15, "loss": 137.45753479003906, "time": 101}
--+{"epoch": 3, "step": 20, "loss": 117.7391357421875, "time": 127}
--+{"epoch": 4, "step": 25, "loss": 71.79619598388672, "time": 154}
--+{"epoch": 5, "step": 30, "loss": 74.55005645751953, "time": 182}
--+{"epoch": 5, "step": 35, "loss": 71.86864471435547, "time": 183}
--+{"epoch": 6, "step": 40, "loss": 67.3455810546875, "time": 253}
--+{"epoch": 7, "step": 45, "loss": 85.43989562988281, "time": 279}
--+{"epoch": 8, "step": 50, "loss": 85.58329772949219, "time": 305}
--+{"epoch": 9, "step": 55, "loss": 75.13690948486328, "time": 333}
--+{"epoch": 10, "step": 60, "loss": 99.44623565673828, "time": 361}
--+{"epoch": 10, "step": 65, "loss": 92.4845962524414, "time": 362}
--+{"epoch": 11, "step": 70, "loss": 70.49784851074219, "time": 435}
--+{"epoch": 12, "step": 75, "loss": 106.4268569946289, "time": 458}
--+{"epoch": 13, "step": 80, "loss": 66.5932388305664, "time": 487}
--+{"epoch": 14, "step": 85, "loss": 88.70879364013672, "time": 511}
--+{"epoch": 15, "step": 90, "loss": 81.76454162597656, "time": 535}
--+{"epoch": 15, "step": 95, "loss": 56.718807220458984, "time": 536}
--+{"epoch": 16, "step": 100, "loss": 73.56828308105469, "time": 599}
--+{"epoch": 17, "step": 105, "loss": 87.1954116821289, "time": 623}
--+{"epoch": 18, "step": 110, "loss": 81.27310180664062, "time": 649}
--+{"epoch": 19, "step": 115, "loss": 118.82411193847656, "time": 673}
--+{"epoch": 20, "step": 120, "loss": 104.59524536132812, "time": 699}
--+{"epoch": 20, "step": 125, "loss": 91.45010375976562, "time": 701}
--+{"epoch": 21, "step": 130, "loss": 96.45476531982422, "time": 768}
--+{"epoch": 22, "step": 135, "loss": 73.63231658935547, "time": 792}
--+{"epoch": 23, "step": 140, "loss": 81.41030883789062, "time": 820}
--+{"epoch": 24, "step": 145, "loss": 68.5522232055664, "time": 845}
--+{"epoch": 25, "step": 150, "loss": 87.08369445800781, "time": 877}
--+{"epoch": 25, "step": 155, "loss": 60.33863830566406, "time": 878}
--+{"epoch": 26, "step": 160, "loss": 90.980224609375, "time": 943}
--+{"epoch": 27, "step": 165, "loss": 89.83417510986328, "time": 967}
--+{"epoch": 28, "step": 170, "loss": 59.04204177856445, "time": 995}
--+{"epoch": 29, "step": 175, "loss": 76.57648468017578, "time": 1020}
--+{"epoch": 30, "step": 180, "loss": 79.04066467285156, "time": 1047}
--+{"epoch": 30, "step": 185, "loss": 116.04915618896484, "time": 1048}
--+{"epoch": 31, "step": 190, "loss": 96.91857147216797, "time": 1120}
--+{"epoch": 32, "step": 195, "loss": 117.3604965209961, "time": 1142}
--+{"epoch": 33, "step": 200, "loss": 79.40359497070312, "time": 1173}
--+{"epoch": 34, "step": 205, "loss": 118.38796997070312, "time": 1199}
--+{"epoch": 35, "step": 210, "loss": 100.85802459716797, "time": 1227}
--+{"epoch": 35, "step": 215, "loss": 127.6283187866211, "time": 1228}
--+{"epoch": 36, "step": 220, "loss": 107.0147705078125, "time": 1295}
--+{"epoch": 37, "step": 225, "loss": 101.71541595458984, "time": 1319}
--+{"epoch": 38, "step": 230, "loss": 109.91344451904297, "time": 1354}
--+{"epoch": 39, "step": 235, "loss": 91.43553924560547, "time": 1382}
--diff --git a/sweep.yaml b/sweep.yaml
--index 6402430..ae76056 100644
----- a/sweep.yaml
--+++ b/sweep.yaml
--@@ -1,17 +1,20 @@
---program: main.py
--+program: train_translation.py
-- method: bayes
-- metric: 
--     name: epoch_loss
--     goal: minimize
-- 
---description: 'trial2 learning q distributions' 
--+description: 'translation sweep' 
-- parameters: 
-- 
---    lambd: 
--+    epochs:
--         distribution: 'q_uniform'
---        min: 0
---        max: 1
---        q: 0.05
--+        min: 10
--+        max: 40
--+        q: 4
--+
--+    batch_size: 
--+        values: [16, 32, 64, 128, 256]
--     
--     nhead:
--         distribution: 'q_uniform'
--@@ -19,6 +22,9 @@ parameters:
--         max: 8
--         q: 2
-- 
--+    dfeedforward:
--+        values: [ 256, 512, 1024]
--+
--     nlayers:
--         distribution: 'q_uniform'
--         min: 2
--@@ -26,6 +32,6 @@ parameters:
--         q: 2
-- 
--     
---
--+ # to add: lr, dropout, betas, loss_fn  
-- 
-- 
--diff --git a/test_translation.py b/test_translation.py
--index 67aad1e..47a6ecd 100644
----- a/test_translation.py
--+++ b/test_translation.py
--@@ -5,13 +5,20 @@ import os
-- 
-- 
-- # translation pretraining 
--+# sweep translation 
--+# wandb sweep_translation.yaml 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
-- 
-- # context enhancement
--+# sweep barlow with translation encoder hyper-params 
--+# sweep sweep_barlow.yaml
-- os.system('python ~/context_enhancement/context_enhancement/barlow.py --load 1') 
-- 
-- # tranining translation
--+#train translation  with translation hyper-params
--+#python train_translation.py 
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 1')
-- 
-- # testing translation
--+# no need
-- os.system('python ~/context_enhancement/context_enhancement/train_translation.py --load 0')
--diff --git a/train_translation.py b/train_translation.py
--index 1b0fe42..f284015 100644
----- a/train_translation.py
--+++ b/train_translation.py
--@@ -101,6 +101,8 @@ parser.add_argument('--train', default=True , type=bool,
-- parser.add_argument('--print_freq', default=5 , type=int,
--                     metavar='PF', help='frequency of printing and saving stats')
-- 
--+parser.add_argument('--test_translation', default=0, type=int, 
--+                    metavar='TT', help='testing translation_score')
-- ''' NOTE: 
--         Transformer and tokenizer arguments would remain constant in training and context enhancement step.  
-- '''
--@@ -143,9 +145,9 @@ def main_worker(gpu, args):
-- 
--     if args.rank == 0:
-- 
---#        wandb.init(config=args, project='translation_test')#############################################
---#        wandb.config.update(args)
---#        config = wandb.config
--+        wandb.init(config=args, project='translation_test')#############################################
--+        wandb.config.update(args)
--+        config = wandb.config
--     
--         # exit()
--         args.checkpoint_dir.mkdir(parents=True, exist_ok=True)
--@@ -236,84 +238,101 @@ def main_worker(gpu, args):
--     start_time = time.time()
-- 
-- 
---    
---    for epoch in range(start_epoch, args.epochs):
---        sampler.set_epoch(epoch)
---        epoch_loss = 0 
---        for step, (sent) in enumerate(loader, start=epoch * len(loader)):
---            src = sent[0].cuda(gpu, non_blocking=True)
---            tgt_inp = sent[2].cuda(gpu, non_blocking=True)
---            tgt_out = sent[3].cuda(gpu, non_blocking=True)
---            
---            src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
---            logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
---            
---            optimizer.zero_grad()
--+    if not args.test_translation: 
-- 
---            loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
---            loss.backward()
--+        for epoch in range(start_epoch, args.epochs):
--+            sampler.set_epoch(epoch)
--+            epoch_loss = 0 
--+            for step, (sent) in enumerate(loader, start=epoch * len(loader)):
--+                src = sent[0].cuda(gpu, non_blocking=True)
--+                tgt_inp = sent[2].cuda(gpu, non_blocking=True)
--+                tgt_out = sent[3].cuda(gpu, non_blocking=True)
--+                
--+                src_mask, tgt_mask, src_padding_mask, tgt_padding_mask = create_mask(src, tgt_inp, pad_idx) 
--+                logits = model(src, tgt_inp, src_mask, tgt_mask, src_padding_mask, tgt_padding_mask, src_padding_mask)
--+                
--+                optimizer.zero_grad()
--+
--+                loss = loss_fn(logits.reshape(-1, logits.shape[-1]), tgt_out.reshape(-1))
--+                loss.backward()
-- 
---            optimizer.step()
---            # losses += loss.item()
--+                optimizer.step()
--+                # losses += loss.item()
--+                
--+                # wandb.log({'iter_loss': loss})
--+                epoch_loss += loss.item()
--+                torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+                
--+                if step % args.print_freq == 0:
--+                    if args.rank == 0:
--+                        stats = dict(epoch=epoch, step=step,
--+                                    loss=loss.item(),
--+                                    time=int(time.time() - start_time))
--+                        print(json.dumps(stats))
--+                        print(json.dumps(stats), file=stats_file)
--+            if args.rank == 0:
--+
--+                wandb.log({"epoch_loss":epoch_loss})
--+                # save checkpoint
--+                state = dict(epoch=epoch + 1, model=model.module.state_dict(),
--+                            optimizer=optimizer.state_dict())
--+                # print(model.state_dict)
--+                torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
--+                print('translation model saved in', args.checkpoint_dir)
--             
---#            wandb.log({'iter_loss': loss})
---            epoch_loss += loss.item()
---            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--+    ##############################################################
--+            if args.rank == 0: 
--+                if epoch%args.checkbleu ==0 : 
--+
--+                    bleu_score = checkbleu(model, tokenizer, test_loader, gpu)
--+                    wandb.log({'bleu_score': bleu_score}) 
--+    #            print(bleu_score(predicted, target))
--+    ##############################################################
--+    #        if epoch%1 ==0 : 
--+    #            torch.save(model.module.state_dict(),
--+    #                   'path.pth')
--+    #            print("Model is saved")
--+            # if args.rank == 0:
--+            #     # save checkpoint
--+            #     state = dict(epoch=epoch + 1, model=model.state_dict(),
--+            #                  optimizer=optimizer.state_dict())
--+            #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
--+            #     print('saved translation model in', args.checkpoint_dir)
--+        wandb.finish()
--             
---            if step % args.print_freq == 0:
---                if args.rank == 0:
---                    stats = dict(epoch=epoch, step=step,
---                                 loss=loss.item(),
---                                 time=int(time.time() - start_time))
---                    print(json.dumps(stats))
---                    print(json.dumps(stats), file=stats_file)
---        # wandb.log({"epoch_loss":epoch_loss})
---        if args.rank == 0:
---            # save checkpoint
---            state = dict(epoch=epoch + 1, model=model.module.state_dict(),
---                         optimizer=optimizer.state_dict())
---            # print(model.state_dict)
---            torch.save(state, args.checkpoint_dir / 'translation_checkpoint.pth')
---            print('translation model saved in', args.checkpoint_dir)
---        
---##############################################################
---        if epoch%args.checkbleu ==0 : 
--+    else: 
--+
--+        bleu_score = checkbleu(model,tokenizer, test_loader, gpu )
--+        print('test_bleu_score', bleu_score)
--+        if args.rank == 0: 
--+            wandb.log({'bleu_score': bleu_score})
--+
-- 
---            model.eval()
---            predicted=[]
---            target=[]
--+def checkbleu(model, tokenizer, test_loader, gpu): 
--+
--+    model.eval()
--+    predicted=[]
--+    target=[]
--             
---            for i in test_loader: 
---                src = i[0].cuda(gpu, non_blocking=True)
---                tgt_out = i[3].cuda(gpu, non_blocking=True)
---                num_tokens = src.shape[0]
---
---                src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
---                out = translate(model, src, tokenizer, src_mask, gpu)
---                predicted.append(out)
---                target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--+    for i in test_loader: 
--+        src = i[0].cuda(gpu, non_blocking=True)
--+        tgt_out = i[3].cuda(gpu, non_blocking=True)
--+        num_tokens = src.shape[0]
--+
--+        src_mask = (torch.zeros(num_tokens, num_tokens)).type(torch.bool).cuda(gpu, non_blocking=True)
--+        out = translate(model, src, tokenizer, src_mask, gpu)
--+        predicted.append(out)
--+        target.append([tokenizer.convert_ids_to_tokens(tgt_out)])
--                 
---                try: 
---                    bleu_score(predicted, target)
---                except: 
---                    predicted.pop()
---                    target.pop()
--+        try: 
--+            bleu_score(predicted, target)
--+        except: 
--+            predicted.pop()
--+            target.pop()
--             
---            print(bleu_score(predicted, target))
---##############################################################
---#        if epoch%1 ==0 : 
---#            torch.save(model.module.state_dict(),
---#                   'path.pth')
---#            print("Model is saved")
---        # if args.rank == 0:
---        #     # save checkpoint
---        #     state = dict(epoch=epoch + 1, model=model.state_dict(),
---        #                  optimizer=optimizer.state_dict())
---        #     torch.save(state, args.checkpoint_dir / f'translation_checkpoint.pth')
---        #     print('saved translation model in', args.checkpoint_dir)
---#    wandb.finish()
---           
--+        bleu = bleu_score(predicted, target)
-- 
--+    return bleu
-- 
-- '''
-- todo: 
--@@ -360,3 +379,4 @@ def translate(model: torch.nn.Module,
-- 
-- if __name__ == '__main__': 
--     main()
--+    wandb.finish()
--diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log
--index b8c8383..6163657 120000
----- a/wandb/debug-internal.log
--+++ b/wandb/debug-internal.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug-internal.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug-internal.log
--\ No newline at end of file
--diff --git a/wandb/debug.log b/wandb/debug.log
--index 1d77d77..7d0f5dd 120000
----- a/wandb/debug.log
--+++ b/wandb/debug.log
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8/logs/debug.log
--\ No newline at end of file
--+run-20220409_182749-paufev36/logs/debug.log
--\ No newline at end of file
--diff --git a/wandb/latest-run b/wandb/latest-run
--index ad4b017..f11d588 120000
----- a/wandb/latest-run
--+++ b/wandb/latest-run
--@@ -1 +1 @@
---run-20220406_171518-s7zesus8
--\ No newline at end of file
--+run-20220409_182749-paufev36
--\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/output.log b/wandb/run-20220409_182749-paufev36/files/output.log
-deleted file mode 100644
-index 8a30e30..0000000
---- a/wandb/run-20220409_182749-paufev36/files/output.log
-+++ /dev/null
-@@ -1,55 +0,0 @@
--
--train_translation.py --batch_size=32 --dfeedforward=1024 --epochs=32 --nhead=2 --nlayers=4
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'epochs' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'batch_size' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nhead' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'dfeedforward' was locked by 'sweep' (ignored update).
--[34m[1mwandb[39m[22m: [33mWARNING[39m Config item 'nlayers' was locked by 'sweep' (ignored update).
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
--Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
--- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
--- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
--{"epoch": 0, "step": 0, "loss": 7.115720272064209, "time": 5}
--/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py:264: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
--  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
--{"epoch": 0, "step": 5, "loss": 202.97476196289062, "time": 6}
--translation model saved in checkpoint
--{"epoch": 1, "step": 10, "loss": 151.204345703125, "time": 62}
--translation model saved in checkpoint
--{"epoch": 2, "step": 15, "loss": 76.84952545166016, "time": 83}
--translation model saved in checkpoint
--{"epoch": 3, "step": 20, "loss": 50.71405029296875, "time": 105}
--translation model saved in checkpoint
--{"epoch": 4, "step": 25, "loss": 38.18907165527344, "time": 127}
--Exception in thread Thread-3:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
--    msg = self._response_queue.get(timeout=1)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
--    res = self._recv_bytes()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
--    buf = self._recv_bytes(maxlength)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
--    buf = self._recv(4)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
--    raise EOFError
--EOFError
--Exception in thread Thread-16:
--Traceback (most recent call last):
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
--    self.run()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
--    self._target(*self._args, **self._kwargs)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/wandb_run.py", line 198, in check_status
--    status_response = self._interface.communicate_stop_status()
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 743, in communicate_stop_status
--    resp = self._communicate(req, timeout=timeout, local=True)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 545, in _communicate
--    return self._communicate_async(rec, local=local).get(timeout=timeout)
--  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 550, in _communicate_async
--    raise Exception("The wandb backend process has shutdown")
-diff --git a/wandb/run-20220409_182749-paufev36/files/requirements.txt b/wandb/run-20220409_182749-paufev36/files/requirements.txt
-deleted file mode 100644
-index 59aa056..0000000
---- a/wandb/run-20220409_182749-paufev36/files/requirements.txt
-+++ /dev/null
-@@ -1,72 +0,0 @@
--aiohttp==3.8.1
--aiosignal==1.2.0
--async-timeout==4.0.2
--asynctest==0.13.0
--attrs==21.4.0
--blessings==1.7
--brotlipy==0.7.0
--certifi==2021.10.8
--cffi==1.15.0
--charset-normalizer==2.0.12
--click==8.0.4
--configparser==5.2.0
--cryptography==36.0.0
--datasets==1.16.1
--dill==0.3.4
--docker-pycreds==0.4.0
--filelock==3.6.0
--frozenlist==1.3.0
--fsspec==2022.2.0
--gitdb==4.0.9
--gitpython==3.1.27
--gpustat==0.6.0
--huggingface-hub==0.4.0
--idna==3.3
--importlib-metadata==4.11.3
--joblib==1.1.0
--mkl-fft==1.3.1
--mkl-random==1.2.2
--mkl-service==2.4.0
--multidict==6.0.2
--multiprocess==0.70.12.2
--numpy==1.21.5
--nvidia-ml-py3==7.352.0
--packaging==21.3
--pandas==1.3.5
--pathtools==0.1.2
--pillow==9.0.1
--pip==21.2.2
--promise==2.3
--protobuf==3.19.4
--psutil==5.9.0
--pyarrow==7.0.0
--pycparser==2.21
--pyopenssl==22.0.0
--pyparsing==3.0.7
--pysocks==1.7.1
--python-dateutil==2.8.2
--pytz==2022.1
--pyyaml==6.0
--regex==2022.3.15
--requests==2.27.1
--sacremoses==0.0.49
--sentry-sdk==1.5.8
--setuptools==58.0.4
--shortuuid==1.0.8
--six==1.16.0
--smmap==5.0.0
--subprocess32==3.5.4
--tokenizers==0.10.3
--torch==1.11.0
--torchaudio==0.11.0
--torchtext==0.12.0
--torchvision==0.12.0
--tqdm==4.63.1
--transformers==4.14.1
--typing-extensions==4.1.1
--urllib3==1.26.9
--wandb==0.10.31
--wheel==0.37.1
--xxhash==3.0.0
--yarl==1.7.2
--zipp==3.7.0
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json b/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-deleted file mode 100644
-index ee6c1fa..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
-+++ /dev/null
-@@ -1,30 +0,0 @@
--{
--    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
--    "python": "3.7.11",
--    "heartbeatAt": "2022-04-09T12:57:50.039943",
--    "startedAt": "2022-04-09T12:57:49.399103",
--    "docker": null,
--    "gpu": "NVIDIA GeForce GTX 1080 Ti",
--    "gpu_count": 2,
--    "cpu_count": 8,
--    "cuda": null,
--    "args": [
--        "--batch_size=32",
--        "--dfeedforward=1024",
--        "--epochs=32",
--        "--nhead=2",
--        "--nlayers=4"
--    ],
--    "state": "running",
--    "program": "/home/ivlabs/context_enhancement/context_new/context_enhancement/train_translation.py",
--    "codePath": "train_translation.py",
--    "git": {
--        "remote": "https://github.com/IvLabs/context_enhancement.git",
--        "commit": "eed2d749c090a46bca0d3e6791485b1c252d8633"
--    },
--    "email": "aneeshashetye@gmail.com",
--    "root": "/home/ivlabs/context_enhancement/context_new/context_enhancement",
--    "host": "hubble-02",
--    "username": "ivlabs",
--    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
--}
-diff --git a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json b/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-deleted file mode 100644
-index 6be8521..0000000
---- a/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
-+++ /dev/null
-@@ -1 +0,0 @@
--{"epoch_loss": 287.689208984375, "_runtime": 137, "_timestamp": 1649509206, "_step": 5, "bleu_score": 0.0}
-\ No newline at end of file
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log b/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-deleted file mode 100644
-index ade12de..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
-+++ /dev/null
-@@ -1,141 +0,0 @@
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,431 DEBUG   MainThread:25755 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send():179] send: header
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,435 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: check_version
--2022-04-09 18:27:49,435 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: check_version
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:49,589 DEBUG   SenderThread:25755 [sender.py:send():179] send: run
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:50,037 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: run_start
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():39] meta init
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:__init__():53] meta init done
--2022-04-09 18:27:50,039 DEBUG   HandlerThread:25755 [meta.py:probe():210] probe
--2022-04-09 18:27:50,045 DEBUG   HandlerThread:25755 [meta.py:_setup_git():200] setup git
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_setup_git():207] setup git done
--2022-04-09 18:27:50,064 DEBUG   HandlerThread:25755 [meta.py:_save_code():89] save code
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_code():110] save code done
--2022-04-09 18:27:50,073 DEBUG   HandlerThread:25755 [meta.py:_save_patches():127] save patches
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_patches():169] save patches done
--2022-04-09 18:27:50,128 DEBUG   HandlerThread:25755 [meta.py:_save_pip():57] save pip
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_pip():71] save pip done
--2022-04-09 18:27:50,129 DEBUG   HandlerThread:25755 [meta.py:_save_conda():78] save conda
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:_save_conda():86] save conda done
--2022-04-09 18:27:51,517 DEBUG   HandlerThread:25755 [meta.py:probe():252] probe done
--2022-04-09 18:27:51,519 DEBUG   SenderThread:25755 [sender.py:send():179] send: files
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,530 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:27:51,530 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:51,872 DEBUG   SenderThread:25755 [sender.py:send():179] send: config
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:04,050 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:06,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:18,996 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,208 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:22,208 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:37,664 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:49,672 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:28:53,002 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:28:53,002 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:28:55,193 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,936 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:00,937 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:08,453 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:08,454 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:20,345 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:22,285 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:23,787 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:23,787 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:39,186 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:39,186 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:29:44,030 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:51,270 DEBUG   SenderThread:25755 [sender.py:send():179] send: stats
--2022-04-09 18:29:54,873 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:29:54,873 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: history
--2022-04-09 18:30:06,522 DEBUG   SenderThread:25755 [sender.py:send():179] send: summary
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:10,343 DEBUG   HandlerThread:25755 [handler.py:handle_request():124] handle_request: stop_status
--2022-04-09 18:30:10,343 DEBUG   SenderThread:25755 [sender.py:send_request():193] send_request: stop_status
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/logs/debug.log b/wandb/run-20220409_182749-paufev36/logs/debug.log
-deleted file mode 100644
-index 7b0f79c..0000000
---- a/wandb/run-20220409_182749-paufev36/logs/debug.log
-+++ /dev/null
-@@ -1,92 +0,0 @@
--2022-04-09 18:27:49,403 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting env: {'entity': 'tera_squid', 'project': 'context_enhancement', 'sweep_id': '1t9pc38r', 'root_dir': '/home/ivlabs/context_enhancement/context_new/context_enhancement', 'run_id': 'paufev36', 'sweep_param_path': '/home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/sweep-1t9pc38r/config-paufev36.yaml', 'start_method': 'thread'}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_setup.py:_flush():69] setting login settings: {}
--2022-04-09 18:27:49,404 INFO    MainThread:25755 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/logs/debug-internal.log
--2022-04-09 18:27:49,405 INFO    MainThread:25755 [wandb_init.py:init():369] calling init triggers
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():376] wandb.init called with sweep_config: {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--config: {'workers': 4, 'epochs': 32, 'batch_size': 32, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'nhead': 2, 'dfeedforward': 1024, 'nlayers': 4, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:49,406 INFO    MainThread:25755 [wandb_init.py:init():418] starting backend
--2022-04-09 18:27:49,427 INFO    MainThread:25755 [backend.py:ensure_launched():132] starting backend process...
--2022-04-09 18:27:49,429 INFO    MainThread:25755 [backend.py:ensure_launched():137] started backend process with pid: 0
--2022-04-09 18:27:49,430 INFO    wandb_internal:25755 [internal.py:wandb_internal():91] W&B internal server running at pid: 25755, started at: 2022-04-09 18:27:49.428830
--2022-04-09 18:27:49,431 INFO    MainThread:25755 [wandb_init.py:init():423] backend started and connected
--2022-04-09 18:27:49,433 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'batch_size': 32, 'dfeedforward': 1024, 'epochs': 32, 'nhead': 2, 'nlayers': 4}
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():465] updated telemetry
--2022-04-09 18:27:49,434 INFO    MainThread:25755 [wandb_init.py:init():484] communicating current version
--2022-04-09 18:27:49,435 INFO    WriterThread:25755 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:27:49,585 INFO    MainThread:25755 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
--
--2022-04-09 18:27:49,586 INFO    MainThread:25755 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:27:50,034 INFO    SenderThread:25755 [sender.py:_start_run_threads():707] run started: paufev36 with start time 1649509069
--2022-04-09 18:27:50,036 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:27:50,036 INFO    MainThread:25755 [wandb_init.py:init():522] starting run threads in backend
--2022-04-09 18:27:51,035 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:27:51,036 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
--2022-04-09 18:27:51,519 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
--2022-04-09 18:27:51,520 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file diff.patch with policy now
--2022-04-09 18:27:51,528 INFO    MainThread:25755 [wandb_run.py:_console_start():1538] atexit reg
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1449] Wrapping output streams.
--2022-04-09 18:27:51,532 INFO    MainThread:25755 [wandb_run.py:_redirect():1473] Redirects installed.
--2022-04-09 18:27:51,533 INFO    MainThread:25755 [wandb_init.py:init():547] run started, returning control to user process
--2022-04-09 18:27:51,534 INFO    MainThread:25755 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'dmodel': 768, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
--2022-04-09 18:27:52,045 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:52,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json
--2022-04-09 18:27:52,686 INFO    Thread-14 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3213fqcg-wandb-metadata.json
--2022-04-09 18:27:52,691 INFO    Thread-15 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/3tltefpg-code/train_translation.py
--2022-04-09 18:27:53,694 INFO    Thread-18 :25755 [upload_job.py:push():133] Uploaded file /tmp/tmpzveu7e54wandb/g47w6xsn-diff.patch
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:54,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:27:56,046 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:27:58,047 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:04,051 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:28:04,051 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:06,055 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:22,059 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:28:55,194 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:28:56,070 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:00,938 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:01,087 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:02,088 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:18,092 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:22,287 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:23,093 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:24,094 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:40,099 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:29:44,031 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:29:44,131 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:02,136 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:06,523 INFO    SenderThread:25755 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
--2022-04-09 18:30:07,138 INFO    Thread-11 :25755 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:15,029 WARNING wandb_internal:25755 [internal.py:is_dead():367] Internal process exiting, parent pid 25740 disappeared
--2022-04-09 18:30:15,030 ERROR   wandb_internal:25755 [internal.py:wandb_internal():143] Internal process shutdown.
--2022-04-09 18:30:15,350 INFO    HandlerThread:25755 [handler.py:finish():638] shutting down handler
--2022-04-09 18:30:15,527 INFO    WriterThread:25755 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [sender.py:finish():933] shutting down sender
--2022-04-09 18:30:15,678 INFO    SenderThread:25755 [dir_watcher.py:finish():282] shutting down directory watcher
--2022-04-09 18:30:16,139 INFO    SenderThread:25755 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt requirements.txt
--2022-04-09 18:30:16,140 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-metadata.json wandb-metadata.json
--2022-04-09 18:30:16,142 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log output.log
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml conda-environment.yaml
--2022-04-09 18:30:16,143 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json wandb-summary.json
--2022-04-09 18:30:16,145 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml config.yaml
--2022-04-09 18:30:16,150 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/diff.patch diff.patch
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/code/train_translation.py code/train_translation.py
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:finish():176] shutting down file pusher
--2022-04-09 18:30:16,152 INFO    SenderThread:25755 [file_pusher.py:join():181] waiting for file pusher
--2022-04-09 18:30:17,012 INFO    Thread-30 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/output.log
--2022-04-09 18:30:17,026 INFO    Thread-32 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/wandb-summary.json
--2022-04-09 18:30:17,131 INFO    Thread-33 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/config.yaml
--2022-04-09 18:30:17,133 INFO    Thread-29 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/requirements.txt
--2022-04-09 18:30:17,424 INFO    Thread-31 :25755 [upload_job.py:push():133] Uploaded file /home/ivlabs/context_enhancement/context_new/context_enhancement/wandb/run-20220409_182749-paufev36/files/conda-environment.yaml
-diff --git a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb b/wandb/run-20220409_182749-paufev36/run-paufev36.wandb
-deleted file mode 100644
-index 70babdb..0000000
-Binary files a/wandb/run-20220409_182749-paufev36/run-paufev36.wandb and /dev/null differ
-diff --git a/wandb/sweep-1t9pc38r/config-paufev36.yaml b/wandb/sweep-1t9pc38r/config-paufev36.yaml
-deleted file mode 100644
-index da3e8b2..0000000
---- a/wandb/sweep-1t9pc38r/config-paufev36.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml b/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-deleted file mode 100644
-index d68afea..0000000
---- a/wandb/sweep-1t9pc38r/config-vjrenr4z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml b/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-deleted file mode 100644
-index cc3235e..0000000
---- a/wandb/sweep-1t9pc38r/config-z44hpswp.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml b/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-deleted file mode 100644
-index 24fc0f6..0000000
---- a/wandb/sweep-7o7qjhjd/config-2o0jaujh.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 1024
--epochs:
--  value: 24
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml b/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-deleted file mode 100644
-index eeb3936..0000000
---- a/wandb/sweep-7o7qjhjd/config-hie2vfqk.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml b/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-deleted file mode 100644
-index f88591e..0000000
---- a/wandb/sweep-7o7qjhjd/config-lfenfbqz.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-abict4v2.yaml b/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-deleted file mode 100644
-index 1b97c5e..0000000
---- a/wandb/sweep-lrpyor0l/config-abict4v2.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 20
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml b/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-deleted file mode 100644
-index 426c8ac..0000000
---- a/wandb/sweep-lrpyor0l/config-ba0yl54z.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml b/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-deleted file mode 100644
-index caf5f78..0000000
---- a/wandb/sweep-lrpyor0l/config-d3rkwo1k.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml b/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-deleted file mode 100644
-index 6b7d3c1..0000000
---- a/wandb/sweep-lrpyor0l/config-fjhaj183.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml b/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-deleted file mode 100644
-index 8f11b7e..0000000
---- a/wandb/sweep-lrpyor0l/config-fjlzyv53.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml b/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-deleted file mode 100644
-index d3a2560..0000000
---- a/wandb/sweep-lrpyor0l/config-orkb33ld.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 32
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml b/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-deleted file mode 100644
-index 403014d..0000000
---- a/wandb/sweep-q27ijx1y/config-dg43ixc4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 512
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml b/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-deleted file mode 100644
-index d1bf3d8..0000000
---- a/wandb/sweep-q27ijx1y/config-fwwd5rya.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 40
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml b/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-deleted file mode 100644
-index 258ae0c..0000000
---- a/wandb/sweep-yoroy32u/config-2dzyn8ls.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 6
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml b/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-deleted file mode 100644
-index dbe827a..0000000
---- a/wandb/sweep-yoroy32u/config-7a0i8c1o.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--nhead:
--  value: 8
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml b/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-deleted file mode 100644
-index 3aeb285..0000000
---- a/wandb/sweep-yoroy32u/config-7wn11wz9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 512
--epochs:
--  value: 40
--nhead:
--  value: 4
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml b/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-deleted file mode 100644
-index ccb6734..0000000
---- a/wandb/sweep-yoroy32u/config-aqxf4pp9.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 32
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yoroy32u/config-gjih072d.yaml b/wandb/sweep-yoroy32u/config-gjih072d.yaml
-deleted file mode 100644
-index 73e8e4c..0000000
---- a/wandb/sweep-yoroy32u/config-gjih072d.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml b/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-deleted file mode 100644
-index 9d822c0..0000000
---- a/wandb/sweep-yoroy32u/config-poi9dsbs.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml b/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-deleted file mode 100644
-index f0bd5df..0000000
---- a/wandb/sweep-yoroy32u/config-th5i0wo4.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 64
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yoroy32u/config-uh7twoim.yaml b/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-deleted file mode 100644
-index 508d9e2..0000000
---- a/wandb/sweep-yoroy32u/config-uh7twoim.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 256
--epochs:
--  value: 20
--nhead:
--  value: 6
--nlayers:
--  value: 2
-diff --git a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml b/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-deleted file mode 100644
-index 83311a7..0000000
---- a/wandb/sweep-yoroy32u/config-zf5ccuzv.yaml
-+++ /dev/null
-@@ -1,12 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 1024
--epochs:
--  value: 16
--nhead:
--  value: 2
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml b/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-deleted file mode 100644
-index 4f6dc35..0000000
---- a/wandb/sweep-yvfclyxy/config-luzuebmc.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 256
--dfeedforward:
--  value: 256
--epochs:
--  value: 36
--lambd:
--  value: 0.4
--nhead:
--  value: 4
--nlayers:
--  value: 6
-diff --git a/wandb/sweep-yvfclyxy/config-padai7jf.yaml b/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-deleted file mode 100644
-index 9b19315..0000000
---- a/wandb/sweep-yvfclyxy/config-padai7jf.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 16
--dfeedforward:
--  value: 1024
--epochs:
--  value: 28
--lambd:
--  value: 0.55
--nhead:
--  value: 8
--nlayers:
--  value: 4
-diff --git a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml b/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-deleted file mode 100644
-index 8a8a9b2..0000000
---- a/wandb/sweep-yvfclyxy/config-r4bjt76k.yaml
-+++ /dev/null
-@@ -1,14 +0,0 @@
--wandb_version: 1
--
--batch_size:
--  value: 128
--dfeedforward:
--  value: 256
--epochs:
--  value: 24
--lambd:
--  value: 0.2
--nhead:
--  value: 2
--nlayers:
--  value: 4
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/output.log b/wandb/run-20220416_014323-1a0lobwa/files/output.log
deleted file mode 100644
index 94424a5..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/output.log
+++ /dev/null
@@ -1,106 +0,0 @@
-
-train_translation.py --load=0
-Reusing dataset opus_rf (/home/ivlabs/.cache/huggingface/datasets/opus_rf/de-en/1.0.0/3725eb23f8df679ddd37d8d65a6bbfcda7732c66edccbc62a3c3b1354c934c9f)
-Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
-- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
-- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
-{"epoch": 0, "step": 0, "loss": 7.128603458404541, "time": 9}
-/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py:275: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
-  torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
-{"epoch": 0, "step": 5, "loss": 156.04449462890625, "time": 39}
-{"epoch": 0, "step": 10, "loss": 154.7353515625, "time": 67}
-translation model saved in checkpoint
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'level', 'of', 'employment', 'in', 'this', 'country', 'is', 'high', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['on', 'the', 'th', '##res', '##hold', 'of', 'the', 'nine', '##ties', ',', 'we', 'should', 'con', '##fir', '##m', 'and', 'strength', '##en', 'in', 'the', 'long', 'term', 'what', 'must', 'be', 'the', 'basis', 'for', 'future', 'developments', 'too', ':', 'the', 'economy', ',', 'full', 'employment', 'and', 'welfare', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['sweden', 'is', 'a', 'good', 'country', 'for', 'enterprise', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['proposals', 'will', 'be', 'put', 'forward', 'for', 'increasing', 'competition', 'and', 'keeping', 'down', 'costs', 'in', 'areas', 'where', 'price', 'trends', 'are', 'boost', '##ing', 'inflation', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['later', 'in', 'the', 'electoral', 'period', ',', 'proposals', 'will', 'be', 'put', 'forward', 'for', 'an', 'extensive', 'reform', 'of', 'the', 'tax', 'system', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['2', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['3', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['opportunities', 'for', 'study', ',', 'work', 'and', 'cultural', 'exchange', '##s', 'across', 'national', 'boundaries', 'will', 'be', 'extended', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['sweden', "'", 's', 'economic', 'situation', 'has', 'improved', 'substantial', '##ly', 'in', 'recent', 'years', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['consideration', 'for', 'the', 'environment', 'and', 'the', 'countryside', 'must', 'character', '##ize', 'developments', 'in', 'all', 'fields', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'policies', 'we', 'pursue', 'in', 'the', 'next', 'three', 'years', 'will', 'leave', 'their', 'mark', 'on', 'developments', 'in', 'this', 'country', 'during', 'the', 'decade', 'to', 'come', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['in', 'this', 'context', ',', 'the', 'requirements', 'of', 'full', 'employment', ',', 'welfare', ',', 'a', 'good', 'working', 'environment', 'and', 'trade', 'union', 'participation', 'will', 'be', 'key', 'issues', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['a', 'reduction', 'of', 'the', 'marginal', 'income', '-', 'tax', 'rate', 'in', '1989', 'by', '3', 'percentage', 'points', 'will', 'be', 'proposed', 'in', 'the', 'first', 'place', 'for', 'full', '-', 'time', 'employees', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'government', "'", 's', 'agricultural', 'policy', 'aims', 'to', 'promote', 'farming', 'that', 'th', '##rive', '##s', 'without', 'having', 'dama', '##ging', 'effects', 'on', 'the', 'environment', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['staff', 'rec', '##ruit', '##ment', 'within', 'the', 'car', '##ing', 'services', 'must', 'be', 'improved', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'aim', 'is', 'to', 'sti', '##mula', '##te', 'work', 'and', 'saving', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['preliminary', 'inspection', 'of', 'new', 'chemical', 'substances', 'will', 'be', 'introduced', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['measures', 'will', 'be', 'taken', 'to', 'protect', 'the', 'visual', 'amen', '##ity', 'of', 'the', 'open', 'landscape', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['4', '.', 'security', 'and', 'responsibility', 'will', 'character', '##ize', 'society', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['it', 'will', 'be', 'pursued', 'with', 'firm', '##ness', 'and', 'consiste', '##ncy', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['international', 'co', '-', 'operation', 'within', 'research', 'and', 'development', 'is', 'becoming', 'increasingly', 'important', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['recu', '##rren', '##t', 'training', 'in', 'working', 'life', 'and', 'qualified', 'further', 'education', 'will', 'play', 'an', 'important', 'role', 'in', 'this', 'context', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['during', 'this', 'electoral', 'period', ',', 'legislation', 'on', 'a', 'sixth', 'week', 'of', 'annual', 'holiday', 'with', 'pay', 'will', 'be', 'ena', '##cted', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['industrial', 'products', 'and', 'processes', 'are', 'to', 'be', 'clean', '##er', 'through', 'string', '##ent', 'requirements', 'and', 'rapid', 'adaptation', 'to', 'new', 'technology', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'government', 'will', 'put', 'forward', 'proposals', 'for', 'developing', 'police', 'work', 'and', 'making', 'it', 'more', 'effective', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['during', 'the', 'coming', 'electoral', 'period', ',', 'sek', '300', 'million', 'will', 'be', 'ear', '##mark', '##ed', 'for', 'the', 'rene', '##wal', 'and', 'development', 'of', 'cultural', 'life', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['the', 'government', 'inte', '##nds', 'to', 'pursue', 'a', 'food', 'policy', 'such', 'that', 'the', 'price', 'trend', 'is', 'check', '##ed', 'and', 'the', 'consumers', 'offered', 'food', 'at', 'reason', '##able', 'prices', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['resources', 'will', 'be', 'set', 'free', 'for', 'the', 'provision', 'of', 'housing', 'by', 'limit', '##ing', 'other', 'construction', 'projects', 'in', 'over', '##hea', '##ted', 'regions', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['sweden', 'has', 'participated', 'in', 'practical', '##ly', 'all', 'the', 'united', 'nations', 'operations', 'of', 'this', 'kind', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['immigrants', "'", 'entry', 'into', 'the', 'labour', 'market', 'should', 'be', 'facilitate', '##d', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['special', 'measures', 'will', 'be', 'applied', 'in', 'regions', 'particularly', 'exposed', ',', 'for', 'example', 'west', 'skane', 'and', 'the', 'sund', '##s', '##vall', '/', 'tim', '##ra', 'area', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['sweden', "'", 's', 'commitment', 'and', 'responsibility', 'does', 'not', 'end', 'at', 'europe', "'", 's', 'borders', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['places', 'in', 'the', 'upper', 'secondary', 'school', 'will', 'be', 'available', 'to', 'all', 'young', 'people', 'under', 'the', 'age', 'of', '20', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['a', 'proposal', 'for', 'a', 'lower', 'legal', 'limit', 'for', 'the', 'offen', '##ce', 'of', 'driving', 'with', 'ability', 'imp', '##aire', '##d', 'by', 'alcohol', 'will', 'be', 'submitted', 'to', 'parliament', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['development', 'assistance', 'shall', 'furthermore', 'promote', 'a', 'sustainable', 'use', 'of', 'natural', 'resources', 'and', 'protection', 'of', 'the', 'environment', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['there', 'is', 'broad', 'political', 'consensus', 'and', 'support', 'for', 'tac', '##kling', 'the', 'environmental', 'problems', 'force', '##fully', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['proposals', 'for', 'pollution', 'charges', 'for', 'other', 'substances', ',', 'among', 'them', 'carbon', 'dio', '##xide', ',', 'will', 'be', 'presented', 'during', 'this', 'term', 'of', 'office', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['a', 'world', 'in', 'deep', 'economic', 'and', 'social', 'im', '##bala', '##nce', 'will', 'never', 'be', 'safe', '.', '[SEP]']
-out ['s', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the']
-predicted ['i', 'should', 'like', 'to', 'welcome', 'the', 'members', 'of', 'the', 'environment', 'party', 'the', 'green', '##s', 'to', 'what', 'i', 'hope', 'will', 'be', 'ins', '##pi', '##ring', 'parliamentary', 'work', '.', '[SEP]']
-Exception in thread Thread-3:
-Traceback (most recent call last):
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 926, in _bootstrap_inner
-    self.run()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/threading.py", line 870, in run
-    self._target(*self._args, **self._kwargs)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/site-packages/wandb/sdk/interface/interface.py", line 114, in message_loop
-    msg = self._response_queue.get(timeout=1)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/queues.py", line 108, in get
-    res = self._recv_bytes()
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
-    buf = self._recv_bytes(maxlength)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
-    buf = self._recv(4)
-  File "/home/ivlabs/miniconda3/envs/ectc/lib/python3.7/multiprocessing/connection.py", line 383, in _recv
-    raise EOFError
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt b/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt
deleted file mode 100644
index 5ddce70..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-aiohttp==3.8.1
-aiosignal==1.2.0
-antlr4-python3-runtime==4.8
-async-timeout==4.0.2
-asynctest==0.13.0
-attrs==21.4.0
-backcall==0.2.0
-bitarray==2.4.1
-blessings==1.7
-brotlipy==0.7.0
-certifi==2021.10.8
-cffi==1.15.0
-charset-normalizer==2.0.12
-click==8.0.4
-colorama==0.4.4
-configparser==5.2.0
-cryptography==36.0.0
-cython==0.29.28
-datasets==1.16.1
-debugpy==1.6.0
-decorator==5.1.1
-dill==0.3.4
-docker-pycreds==0.4.0
-entrypoints==0.4
-fairseq==1.0.0a0
-fastbpe==0.1.0
-filelock==3.6.0
-frozenlist==1.3.0
-fsspec==2022.2.0
-gitdb==4.0.9
-gitpython==3.1.27
-gpustat==0.6.0
-huggingface-hub==0.4.0
-hydra-core==1.0.7
-idna==3.3
-importlib-metadata==4.11.3
-importlib-resources==5.6.0
-ipykernel==6.12.1
-ipython==7.32.0
-jedi==0.18.1
-joblib==1.1.0
-jupyter-client==7.2.2
-jupyter-core==4.9.2
-matplotlib-inline==0.1.3
-mkl-fft==1.3.1
-mkl-random==1.2.2
-mkl-service==2.4.0
-mock==4.0.3
-multidict==6.0.2
-multiprocess==0.70.12.2
-nest-asyncio==1.5.5
-numpy==1.21.5
-nvidia-ml-py3==7.352.0
-omegaconf==2.0.6
-packaging==21.3
-pandas==1.3.5
-parso==0.8.3
-pathtools==0.1.2
-pexpect==4.8.0
-pickleshare==0.7.5
-pillow==9.0.1
-pip==21.2.2
-portalocker==2.4.0
-promise==2.3
-prompt-toolkit==3.0.29
-protobuf==3.19.4
-psutil==5.9.0
-ptyprocess==0.7.0
-pyarrow==7.0.0
-pycparser==2.21
-pygments==2.11.2
-pyopenssl==22.0.0
-pyparsing==3.0.7
-pysocks==1.7.1
-python-dateutil==2.8.2
-pytz==2022.1
-pyyaml==6.0
-pyzmq==22.3.0
-regex==2022.3.15
-requests==2.27.1
-sacrebleu==2.0.0
-sacremoses==0.0.49
-sentry-sdk==1.5.8
-setuptools==58.0.4
-shortuuid==1.0.8
-six==1.16.0
-smmap==5.0.0
-subprocess32==3.5.4
-subword-nmt==0.3.8
-tabulate==0.8.9
-tokenizers==0.10.3
-torch==1.11.0
-torchaudio==0.11.0
-torchtext==0.12.0
-torchvision==0.12.0
-tornado==6.1
-tqdm==4.63.1
-traitlets==5.1.1
-transformers==4.14.1
-typing-extensions==4.1.1
-urllib3==1.26.9
-wandb==0.10.31
-wcwidth==0.2.5
-wheel==0.37.1
-xxhash==3.0.0
-yarl==1.7.2
-zipp==3.7.0
\ No newline at end of file
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json b/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json
deleted file mode 100644
index df71503..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "os": "Linux-5.13.0-39-generic-x86_64-with-debian-bullseye-sid",
-    "python": "3.7.11",
-    "heartbeatAt": "2022-04-15T20:13:24.853414",
-    "startedAt": "2022-04-15T20:13:23.783007",
-    "docker": null,
-    "gpu": "NVIDIA GeForce GTX 1080 Ti",
-    "gpu_count": 2,
-    "cpu_count": 8,
-    "cuda": null,
-    "args": [
-        "--load=0"
-    ],
-    "state": "running",
-    "program": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement/train_translation.py",
-    "codePath": "train_translation.py",
-    "git": {
-        "remote": "https://github.com/IvLabs/context_enhancement.git",
-        "commit": "3f7c03274d50f816db3079adcb4d4125620373b6"
-    },
-    "email": "aneeshashetye@gmail.com",
-    "root": "/home/ivlabs/context_enhancement/context_new/new/context_enhancement",
-    "host": "hubble-02",
-    "username": "ivlabs",
-    "executable": "/home/ivlabs/miniconda3/envs/ectc/bin/python"
-}
diff --git a/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json b/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
deleted file mode 100644
index e0c4e63..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
+++ /dev/null
@@ -1 +0,0 @@
-{"epoch_loss": 137.94474399089813, "_runtime": 83, "_timestamp": 1650053686, "_step": 0}
\ No newline at end of file
diff --git a/wandb/run-20220416_014323-1a0lobwa/logs/debug-internal.log b/wandb/run-20220416_014323-1a0lobwa/logs/debug-internal.log
deleted file mode 100644
index 1294372..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/logs/debug-internal.log
+++ /dev/null
@@ -1,117 +0,0 @@
-2022-04-16 01:43:23,789 INFO    MainThread:6896 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:43:23,790 INFO    wandb_internal:6896 [internal.py:wandb_internal():91] W&B internal server running at pid: 6896, started at: 2022-04-16 01:43:23.789717
-2022-04-16 01:43:23,791 INFO    MainThread:6896 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:43:23,791 DEBUG   MainThread:6896 [config_util.py:dict_from_config_file():101] no default config file found in config-defaults.yaml
-2022-04-16 01:43:23,792 INFO    MainThread:6896 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:43:23,793 INFO    MainThread:6896 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:43:23,795 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: check_version
-2022-04-16 01:43:23,793 INFO    WriterThread:6896 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb
-2022-04-16 01:43:23,796 DEBUG   SenderThread:6896 [sender.py:send():179] send: header
-2022-04-16 01:43:23,796 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: check_version
-2022-04-16 01:43:24,121 INFO    MainThread:6896 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:43:24,121 INFO    MainThread:6896 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:43:24,122 DEBUG   SenderThread:6896 [sender.py:send():179] send: run
-2022-04-16 01:43:24,850 INFO    MainThread:6896 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:43:24,850 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: run_start
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [sender.py:_start_run_threads():707] run started: 1a0lobwa with start time 1650053603
-2022-04-16 01:43:24,851 DEBUG   SenderThread:6896 [sender.py:send():179] send: summary
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:43:24,853 DEBUG   HandlerThread:6896 [meta.py:__init__():39] meta init
-2022-04-16 01:43:24,853 DEBUG   HandlerThread:6896 [meta.py:__init__():53] meta init done
-2022-04-16 01:43:24,853 DEBUG   HandlerThread:6896 [meta.py:probe():210] probe
-2022-04-16 01:43:24,859 DEBUG   HandlerThread:6896 [meta.py:_setup_git():200] setup git
-2022-04-16 01:43:24,876 DEBUG   HandlerThread:6896 [meta.py:_setup_git():207] setup git done
-2022-04-16 01:43:24,876 DEBUG   HandlerThread:6896 [meta.py:_save_code():89] save code
-2022-04-16 01:43:24,886 DEBUG   HandlerThread:6896 [meta.py:_save_code():110] save code done
-2022-04-16 01:43:24,886 DEBUG   HandlerThread:6896 [meta.py:_save_patches():127] save patches
-2022-04-16 01:43:24,961 DEBUG   HandlerThread:6896 [meta.py:_save_patches():169] save patches done
-2022-04-16 01:43:24,961 DEBUG   HandlerThread:6896 [meta.py:_save_pip():57] save pip
-2022-04-16 01:43:24,961 DEBUG   HandlerThread:6896 [meta.py:_save_pip():71] save pip done
-2022-04-16 01:43:24,961 DEBUG   HandlerThread:6896 [meta.py:_save_conda():78] save conda
-2022-04-16 01:43:25,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/diff.patch
-2022-04-16 01:43:25,855 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code
-2022-04-16 01:43:26,705 DEBUG   HandlerThread:6896 [meta.py:_save_conda():86] save conda done
-2022-04-16 01:43:26,705 DEBUG   HandlerThread:6896 [meta.py:probe():252] probe done
-2022-04-16 01:43:26,708 DEBUG   SenderThread:6896 [sender.py:send():179] send: files
-2022-04-16 01:43:26,708 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:43:26,709 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:43:26,710 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:43:26,718 INFO    MainThread:6896 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:43:26,719 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:43:26,719 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:43:26,719 INFO    MainThread:6896 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:43:26,722 INFO    MainThread:6896 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:27,375 DEBUG   SenderThread:6896 [sender.py:send():179] send: config
-2022-04-16 01:43:28,355 INFO    Thread-15 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/2u1coito-code/train_translation.py
-2022-04-16 01:43:28,852 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:28,939 INFO    Thread-14 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/2kqba8ii-wandb-metadata.json
-2022-04-16 01:43:29,213 INFO    Thread-22 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/26d72ylc-diff.patch
-2022-04-16 01:43:29,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/config.yaml
-2022-04-16 01:43:30,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:32,881 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:42,376 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:43:42,376 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:43:44,886 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:53,405 DEBUG   SenderThread:6896 [sender.py:send():179] send: stats
-2022-04-16 01:43:58,051 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:43:58,052 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:44:12,895 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:44:13,751 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:44:13,751 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:44:23,825 DEBUG   SenderThread:6896 [sender.py:send():179] send: stats
-2022-04-16 01:44:29,521 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:44:29,521 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:44:42,905 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:44:45,209 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:44:45,210 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:44:46,692 DEBUG   SenderThread:6896 [sender.py:send():179] send: history
-2022-04-16 01:44:46,692 DEBUG   SenderThread:6896 [sender.py:send():179] send: summary
-2022-04-16 01:44:46,692 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:44:46,909 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
-2022-04-16 01:44:54,512 DEBUG   SenderThread:6896 [sender.py:send():179] send: stats
-2022-04-16 01:45:00,912 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:45:00,912 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:45:00,917 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:04,918 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:06,919 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:08,920 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:10,921 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:12,921 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:14,922 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:16,688 DEBUG   HandlerThread:6896 [handler.py:handle_request():124] handle_request: stop_status
-2022-04-16 01:45:16,688 DEBUG   SenderThread:6896 [sender.py:send_request():193] send_request: stop_status
-2022-04-16 01:45:16,926 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:18,927 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:20,928 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:22,928 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:24,929 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:25,143 DEBUG   SenderThread:6896 [sender.py:send():179] send: stats
-2022-04-16 01:45:26,144 INFO    SenderThread:6896 [sender.py:finish():933] shutting down sender
-2022-04-16 01:45:26,144 INFO    WriterThread:6896 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb
-2022-04-16 01:45:26,144 INFO    SenderThread:6896 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:45:26,891 INFO    MainThread:6896 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-16 01:45:26,892 INFO    MainThread:6896 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:45:26,930 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:26,930 INFO    SenderThread:6896 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt requirements.txt
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json wandb-metadata.json
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log output.log
-2022-04-16 01:45:26,934 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml conda-environment.yaml
-2022-04-16 01:45:26,938 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json wandb-summary.json
-2022-04-16 01:45:26,941 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/config.yaml config.yaml
-2022-04-16 01:45:26,941 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/diff.patch diff.patch
-2022-04-16 01:45:26,949 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py code/train_translation.py
-2022-04-16 01:45:26,949 INFO    SenderThread:6896 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:45:26,950 INFO    SenderThread:6896 [file_pusher.py:join():181] waiting for file pusher
diff --git a/wandb/run-20220416_014323-1a0lobwa/logs/debug.log b/wandb/run-20220416_014323-1a0lobwa/logs/debug.log
deleted file mode 100644
index 4a5d442..0000000
--- a/wandb/run-20220416_014323-1a0lobwa/logs/debug.log
+++ /dev/null
@@ -1,81 +0,0 @@
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_setup.py:_flush():69] setting env: {'start_method': 'thread'}
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_setup.py:_flush():69] setting login settings: {}
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_init.py:_log_setup():336] Logging user logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/logs/debug.log
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_init.py:_log_setup():337] Logging internal logs to /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/logs/debug-internal.log
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_init.py:init():369] calling init triggers
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_init.py:init():376] wandb.init called with sweep_config: {}
-config: {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': (0.9, 0.98), 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': PosixPath('checkpoint'), 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:43:23,784 INFO    MainThread:6896 [wandb_init.py:init():418] starting backend
-2022-04-16 01:43:23,789 INFO    MainThread:6896 [backend.py:ensure_launched():132] starting backend process...
-2022-04-16 01:43:23,789 INFO    MainThread:6896 [backend.py:ensure_launched():137] started backend process with pid: 0
-2022-04-16 01:43:23,790 INFO    wandb_internal:6896 [internal.py:wandb_internal():91] W&B internal server running at pid: 6896, started at: 2022-04-16 01:43:23.789717
-2022-04-16 01:43:23,791 INFO    MainThread:6896 [wandb_init.py:init():423] backend started and connected
-2022-04-16 01:43:23,792 INFO    MainThread:6896 [wandb_init.py:init():465] updated telemetry
-2022-04-16 01:43:23,793 INFO    MainThread:6896 [wandb_init.py:init():484] communicating current version
-2022-04-16 01:43:23,793 INFO    WriterThread:6896 [datastore.py:open_for_write():77] open: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb
-2022-04-16 01:43:24,121 INFO    MainThread:6896 [wandb_init.py:init():489] got version response upgrade_message: "wandb version 0.12.14 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"
-
-2022-04-16 01:43:24,121 INFO    MainThread:6896 [wandb_init.py:init():497] communicating run to backend with 30 second timeout
-2022-04-16 01:43:24,850 INFO    MainThread:6896 [wandb_init.py:init():522] starting run threads in backend
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [dir_watcher.py:__init__():168] watching files in: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [sender.py:_start_run_threads():707] run started: 1a0lobwa with start time 1650053603
-2022-04-16 01:43:24,851 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:43:25,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
-2022-04-16 01:43:25,854 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/diff.patch
-2022-04-16 01:43:25,855 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code
-2022-04-16 01:43:26,708 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-metadata.json with policy now
-2022-04-16 01:43:26,709 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file code/train_translation.py with policy now
-2022-04-16 01:43:26,710 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file diff.patch with policy now
-2022-04-16 01:43:26,718 INFO    MainThread:6896 [wandb_run.py:_console_start():1538] atexit reg
-2022-04-16 01:43:26,719 INFO    MainThread:6896 [wandb_run.py:_redirect():1412] redirect: SettingsConsole.WRAP
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_run.py:_redirect():1449] Wrapping output streams.
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_run.py:_redirect():1473] Redirects installed.
-2022-04-16 01:43:26,721 INFO    MainThread:6896 [wandb_init.py:init():547] run started, returning control to user process
-2022-04-16 01:43:26,722 INFO    MainThread:6896 [wandb_run.py:_config_callback():787] config_cb None None {'workers': 4, 'epochs': 10, 'batch_size': 16, 'learning_rate': 0.2, 'dropout': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'clip': 1, 'betas': [0.9, 0.98], 'eps': 1e-09, 'loss_fn': 'cross_entropy', 'optimizer': 'adam', 'dmodel': 768, 'nhead': 4, 'dfeedforward': 200, 'nlayers': 3, 'projector': '768-256', 'tokenizer': 'bert-base-multilingual-uncased', 'mbert_out_size': 768, 'checkpoint_dir': 'checkpoint', 'load': 0, 'checkbleu': 5, 'train': True, 'print_freq': 5, 'test_translation': 0, 'ngpus_per_node': 2, 'rank': 0, 'dist_url': 'tcp://localhost:58472', 'world_size': 2}
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json
-2022-04-16 01:43:26,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_created():216] file/dir created: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:28,355 INFO    Thread-15 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/2u1coito-code/train_translation.py
-2022-04-16 01:43:28,852 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:28,939 INFO    Thread-14 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/2kqba8ii-wandb-metadata.json
-2022-04-16 01:43:29,213 INFO    Thread-22 :6896 [upload_job.py:push():133] Uploaded file /tmp/tmpihia6f2xwandb/26d72ylc-diff.patch
-2022-04-16 01:43:29,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/config.yaml
-2022-04-16 01:43:30,853 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:32,881 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:43:44,886 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:44:12,895 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:44:42,905 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:44:46,692 INFO    SenderThread:6896 [sender.py:_save_file():829] saving file wandb-summary.json with policy end
-2022-04-16 01:44:46,909 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json
-2022-04-16 01:45:00,917 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:04,918 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:06,919 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:08,920 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:10,921 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:12,921 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:14,922 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:16,926 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:18,927 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:20,928 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:22,928 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:24,929 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:26,144 INFO    SenderThread:6896 [sender.py:finish():933] shutting down sender
-2022-04-16 01:45:26,144 INFO    WriterThread:6896 [datastore.py:close():258] close: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb
-2022-04-16 01:45:26,144 INFO    SenderThread:6896 [dir_watcher.py:finish():282] shutting down directory watcher
-2022-04-16 01:45:26,891 INFO    MainThread:6896 [wandb_run.py:_atexit_cleanup():1508] got exitcode: 255
-2022-04-16 01:45:26,892 INFO    MainThread:6896 [wandb_run.py:_restore():1480] restore
-2022-04-16 01:45:26,930 INFO    Thread-11 :6896 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log
-2022-04-16 01:45:26,930 INFO    SenderThread:6896 [dir_watcher.py:finish():312] scan: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/requirements.txt requirements.txt
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-metadata.json wandb-metadata.json
-2022-04-16 01:45:26,931 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/output.log output.log
-2022-04-16 01:45:26,934 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/conda-environment.yaml conda-environment.yaml
-2022-04-16 01:45:26,938 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/wandb-summary.json wandb-summary.json
-2022-04-16 01:45:26,941 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/config.yaml config.yaml
-2022-04-16 01:45:26,941 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/diff.patch diff.patch
-2022-04-16 01:45:26,949 INFO    SenderThread:6896 [dir_watcher.py:finish():318] scan save: /home/ivlabs/context_enhancement/context_new/new/context_enhancement/wandb/run-20220416_014323-1a0lobwa/files/code/train_translation.py code/train_translation.py
-2022-04-16 01:45:26,949 INFO    SenderThread:6896 [file_pusher.py:finish():176] shutting down file pusher
-2022-04-16 01:45:26,950 INFO    SenderThread:6896 [file_pusher.py:join():181] waiting for file pusher
diff --git a/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb b/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb
deleted file mode 100644
index a79c900..0000000
Binary files a/wandb/run-20220416_014323-1a0lobwa/run-1a0lobwa.wandb and /dev/null differ